From 421265de87f1398df8c905b35cda45a0b5280112 Mon Sep 17 00:00:00 2001 From: chenghuaWang <2923277184@qq.com> Date: Wed, 31 Dec 2025 09:34:04 +0000 Subject: [PATCH 01/13] feat(qualcomm, qwen3 ptq): PTQ py script for quantize qwen3 for qualcomm NPU. --- .../backends/qualcomm/transformers/.gitignore | 1 + .../__init__.py} | 0 .../qualcomm/transformers/core/qdq.py | 53 ++ .../qualcomm/transformers/core/qlinear.py | 187 +++++++ .../qualcomm/transformers/core/rms_norm.py | 72 +++ .../transformers/core/test_qlinear.py | 89 ++++ .../qualcomm/transformers/static_qwen3.py | 458 +++++++++++++++++- 7 files changed, 858 insertions(+), 2 deletions(-) create mode 100644 pymllm/backends/qualcomm/transformers/.gitignore rename pymllm/backends/qualcomm/transformers/{static_qwen3_quantizer.py => core/__init__.py} (100%) create mode 100644 pymllm/backends/qualcomm/transformers/core/qdq.py create mode 100644 pymllm/backends/qualcomm/transformers/core/qlinear.py create mode 100644 pymllm/backends/qualcomm/transformers/core/rms_norm.py create mode 100644 pymllm/backends/qualcomm/transformers/core/test_qlinear.py diff --git a/pymllm/backends/qualcomm/transformers/.gitignore b/pymllm/backends/qualcomm/transformers/.gitignore new file mode 100644 index 000000000..198e9b7c5 --- /dev/null +++ b/pymllm/backends/qualcomm/transformers/.gitignore @@ -0,0 +1 @@ +static_one_more_thing.py diff --git a/pymllm/backends/qualcomm/transformers/static_qwen3_quantizer.py b/pymllm/backends/qualcomm/transformers/core/__init__.py similarity index 100% rename from pymllm/backends/qualcomm/transformers/static_qwen3_quantizer.py rename to pymllm/backends/qualcomm/transformers/core/__init__.py diff --git a/pymllm/backends/qualcomm/transformers/core/qdq.py b/pymllm/backends/qualcomm/transformers/core/qdq.py new file mode 100644 index 000000000..9d087a7b9 --- /dev/null +++ b/pymllm/backends/qualcomm/transformers/core/qdq.py @@ -0,0 +1,53 @@ +import torch +import torch.nn as nn +from torch.ao.quantization import FakeQuantize, MinMaxObserver + + +class ActivationQDQInt16PerTensorSym(nn.Module): + def __init__(self): + super().__init__() + self.fake_quant = FakeQuantize( + observer=MinMaxObserver, + quant_min=-32768, + quant_max=32767, + dtype=torch.qint32, + qscheme=torch.per_tensor_symmetric, + ) + self.enable_observer() + + def forward(self, x): + return self.fake_quant(x) + + def enable_observer(self): + self.fake_quant.enable_observer() + + def disable_observer(self): + self.fake_quant.disable_observer() + + +class ActivationQDQInt8PerTensorSym(nn.Module): + def __init__(self): + super().__init__() + self.fake_quant = FakeQuantize( + observer=MinMaxObserver, + quant_min=-128, + quant_max=127, + dtype=torch.qint32, + qscheme=torch.per_tensor_symmetric, + ) + self.enable_observer() + + def forward(self, x): + return self.fake_quant(x) + + def enable_observer(self): + self.fake_quant.enable_observer() + + def disable_observer(self): + self.fake_quant.disable_observer() + + +QDQ_OP = { + "A8-PerTensor": ActivationQDQInt8PerTensorSym, + "A16-PerTensor": ActivationQDQInt16PerTensorSym, +} diff --git a/pymllm/backends/qualcomm/transformers/core/qlinear.py b/pymllm/backends/qualcomm/transformers/core/qlinear.py new file mode 100644 index 000000000..654f05f3d --- /dev/null +++ b/pymllm/backends/qualcomm/transformers/core/qlinear.py @@ -0,0 +1,187 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.ao.quantization import FakeQuantize, MinMaxObserver, PerChannelMinMaxObserver + + +class QLinear(nn.Module): + def __init__(self, in_features, out_features, bias=True): + super().__init__() + self.in_features = in_features + self.out_features = out_features + self.weight = nn.Parameter( + torch.randn(out_features, in_features, dtype=torch.bfloat16) + ) + if bias: + self.bias = nn.Parameter(torch.zeros(out_features, dtype=torch.bfloat16)) + else: + self.register_parameter("bias", None) + + self.act_quant = None + self.weight_quant = None + self.w_q_cache = None + + def _setup_status(self, already_quantized_w, already_quantized_a): + if self.act_quant: + if already_quantized_a: + self.act_quant.disable_observer() + else: + self.act_quant.enable_observer() + if self.weight_quant: + if already_quantized_w: + self.weight_quant.disable_observer() + else: + self.weight_quant.enable_observer() + + def _clear_cache(self): + self.w_q_cache = None + + +class QLinearW8A16_PerChannelSym_PerTensorSym(QLinear): + def __init__( + self, + in_features, + out_features, + bias=True, + already_quantized_weight=False, + already_quantized_activation=False, + ): + super().__init__(in_features, out_features, bias) + + self.weight_quant = FakeQuantize( + observer=PerChannelMinMaxObserver, + quant_min=-128, + quant_max=127, + dtype=torch.qint32, + qscheme=torch.per_channel_symmetric, + ch_axis=0, + ) + self._setup_status(already_quantized_weight, already_quantized_activation) + + def forward(self, x): + x_q = x + if self.w_q_cache is not None: + w_q = self.w_q_cache + else: + w_q = self.weight_quant(self.weight) + self.w_q_cache = w_q + return F.linear(x_q, w_q, self.bias) + + +class QLinearLPBQ(QLinear): + def __init__( + self, + in_features, + out_features, + bias=True, + block_size=64, + already_quantized_weight=False, + already_quantized_activation=False, + ): + super().__init__(in_features, out_features, bias) + + self.block_size = block_size + self.already_quantized_w = already_quantized_weight + + # Define buffers to store quantization parameters + # Initially set to None, populated during first forward pass, or saved to state_dict + self.register_buffer("scale_2_fp32", None) # Level 2 Scale (FP32/BF16) + self.register_buffer( + "scale_1_uint4", None + ) # Level 1 Scale Indices (Uint4 stored as Uint8) + self.register_buffer("weight_q", None) # Weight Indices (Int4 stored as Int8) + + self._setup_status(already_quantized_weight, already_quantized_activation) + + def _fake_quant_weight_double(self, w): + """ + Double quantization calculation (no STE, forward-only simulation) + And save quantization parameters to Buffer + """ + out_channels, in_channels = w.shape + + # 1. Padding + padding = 0 + if in_channels % self.block_size != 0: + padding = self.block_size - (in_channels % self.block_size) + w = F.pad(w, (0, padding), "constant", 0) + + # Reshape: [Out, Num_Blocks, Block_Size] + w_reshaped = w.view(out_channels, -1, self.block_size) + + # ======================================================= + # Level 1 Scale Calculation (Ideal FP32) + # ======================================================= + w_int4_max = 7.0 + # w_int4_min = -8.0 + + # [Out, Num_Blocks, 1] + w_abs_max = w_reshaped.abs().amax(dim=-1, keepdim=True) + scale_1_fp32 = w_abs_max / w_int4_max + scale_1_fp32 = torch.clamp(scale_1_fp32, min=1e-8) + + # ======================================================= + # Level 2 Scale Calculation & Level 1 Scale Quantization + # ======================================================= + s_uint4_max = 15.0 + s_uint4_min = 0.0 + + # Calculate Level 2 Scale (Per-Channel FP32) -> [Out, 1, 1] + scale_2_fp32 = scale_1_fp32.amax(dim=1, keepdim=True) / s_uint4_max + scale_2_fp32 = torch.clamp(scale_2_fp32, min=1e-8) + + # Quantize Level 1 Scale: FP32 -> Uint4 Indices + scale_1_q = torch.round(scale_1_fp32 / scale_2_fp32) + scale_1_q = torch.clamp(scale_1_q, s_uint4_min, s_uint4_max) + + # Dequantize Level 1 Scale + scale_1_recon = scale_1_q * scale_2_fp32 + + # ======================================================= + # Apply Level 1 Quantization (Quantize Weights) + # ======================================================= + w_int4_min = -8.0 + + # Quantize Weight: FP32 -> Int4 Indices + w_q = torch.round(w_reshaped / scale_1_recon) + w_q = torch.clamp(w_q, w_int4_min, w_int4_max) + + # Dequantize Weight + w_recon = w_q * scale_1_recon + + # ======================================================= + # [NEW] Store Scales and Indices + # ======================================================= + # Note: We store Indices here, typically converted to int8/uint8 to save space + # scale_2 itself is a floating-point number, kept as is + self.scale_2_fp32 = scale_2_fp32.detach() + # scale_1_q is 0-15, stored as uint8 + self.scale_1_uint4 = scale_1_q.detach().to(torch.uint8) + # w_q is -8 to 7, stored as int8 + self.weight_q = w_q.detach().to(torch.int8) + + # ======================================================= + # Restore Shape + # ======================================================= + w_out = w_recon.view(out_channels, -1) + if padding > 0: + w_out = w_out[:, :-padding] + + return w_out.to(torch.bfloat16) + + def forward(self, x): + x_q = x + + if self.w_q_cache is not None: + w_q = self.w_q_cache + else: + if self.already_quantized_w: + w_q = self.weight + else: + # Real-time calculation and update of self.scale_2, self.scale_1_idx, self.weight_idx + w_q = self._fake_quant_weight_double(self.weight) + + if self.use_weight_cache: + self.w_q_cache = w_q + + return F.linear(x_q, w_q, self.bias) diff --git a/pymllm/backends/qualcomm/transformers/core/rms_norm.py b/pymllm/backends/qualcomm/transformers/core/rms_norm.py new file mode 100644 index 000000000..eb9ec8d88 --- /dev/null +++ b/pymllm/backends/qualcomm/transformers/core/rms_norm.py @@ -0,0 +1,72 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.ao.quantization import FakeQuantize, MinMaxObserver + + +class QRMSNorm(nn.Module): + """ + RMSNorm with int16 per-tensor symmetric quantized weight. + + This implementation applies quantization to the weight tensor only, + using per-tensor symmetric quantization with int16 range. + """ + + def __init__( + self, + normalized_shape, + eps=1e-6, + elementwise_affine=True, + already_quantized_weight=False, + ): + super().__init__() + + if isinstance(normalized_shape, int): + normalized_shape = (normalized_shape,) + self.normalized_shape = tuple(normalized_shape) + self.eps = eps + self.already_quantized_w = already_quantized_weight + + if elementwise_affine: + self.weight = nn.Parameter( + torch.ones(normalized_shape, dtype=torch.bfloat16) + ) + else: + self.register_parameter("weight", None) + + # Weight quantization for int16 per-tensor symmetric + self.weight_quant = FakeQuantize( + observer=MinMaxObserver, + quant_min=-32768, + quant_max=32767, + dtype=torch.qint32, + qscheme=torch.per_tensor_symmetric, + ) + + self.w_q_cache = None + self.use_weight_cache = already_quantized_weight + + def _clear_cache(self): + self.w_q_cache = None + + def forward(self, x): + # Compute RMS norm + variance = x.to(torch.float32).pow(2).mean(-1, keepdim=True) + x = x * torch.rsqrt(variance + self.eps) + + # Apply quantized weight + if self.weight is not None: + if self.w_q_cache is not None: + w_q = self.w_q_cache + else: + if self.already_quantized_w: + w_q = self.weight + else: + w_q = self.weight_quant(self.weight) + + if self.use_weight_cache: + self.w_q_cache = w_q + + x = x * w_q + + return x diff --git a/pymllm/backends/qualcomm/transformers/core/test_qlinear.py b/pymllm/backends/qualcomm/transformers/core/test_qlinear.py new file mode 100644 index 000000000..69edd69f6 --- /dev/null +++ b/pymllm/backends/qualcomm/transformers/core/test_qlinear.py @@ -0,0 +1,89 @@ +import torch +import torch.nn as nn +from pymllm.backends.qualcomm.transformers.core.qlinear import QLinearLPBQ + + +def test_qlinear_lpbq(): + """ + Test QLinearLPBQ implementation against bf16 baseline. + + This test verifies that the double quantization implementation + produces results close to the bf16 baseline when using appropriate + quantization parameters. + """ + # Set random seed for reproducibility + torch.manual_seed(42) + + # Test parameters + in_features = 256 + out_features = 128 + batch_size = 4 + seq_len = 16 + block_size = 64 + + # Create input tensor (bf16 baseline) + x_bf16 = torch.randn(batch_size, seq_len, in_features, dtype=torch.bfloat16) + + # Create reference linear layer (bf16) + linear_bf16 = nn.Linear(in_features, out_features, bias=True, dtype=torch.bfloat16) + # Copy weights and bias to ensure same values + with torch.no_grad(): + linear_bf16.weight.copy_( + torch.randn(out_features, in_features, dtype=torch.bfloat16) + ) + linear_bf16.bias.copy_(torch.zeros(out_features, dtype=torch.bfloat16)) + + # Get bf16 reference output + with torch.no_grad(): + output_bf16 = linear_bf16(x_bf16) + + # Create QLinearLPBQ with same weights + qlinear = QLinearLPBQ( + in_features=in_features, + out_features=out_features, + bias=True, + block_size=block_size, + already_quantized_weight=False, + already_quantized_activation=False, + ) + + # Copy the same weights and bias + with torch.no_grad(): + qlinear.weight.copy_(linear_bf16.weight.data) + if qlinear.bias is not None: + qlinear.bias.copy_(linear_bf16.bias.data) + + # Get quantized output + with torch.no_grad(): + output_q = qlinear(x_bf16) + output_q_bf16 = output_q + + # Calculate metrics + mse = torch.mean((output_bf16 - output_q_bf16) ** 2) + mae = torch.mean(torch.abs(output_bf16 - output_q_bf16)) + + # Calculate relative error + relative_error = torch.mean( + torch.abs(output_bf16 - output_q_bf16) / (torch.abs(output_bf16) + 1e-8) + ) + + # Print results + print("=== QLinearLPBQ Test Results ===") + print(f"Input shape: {x_bf16.shape}") + print(f"Output shape: {output_bf16.shape}") + print(f"Block size: {block_size}") + print("\nComparison with bf16 baseline:") + print(f"MSE: {mse:.6e}") + print(f"MAE: {mae:.6e}") + print(f"Relative Error: {relative_error:.6e}") + + # Check if results are within acceptable tolerance + # For double quantization, we expect some error but should be reasonable + tolerance = 0.1 # 10% relative error tolerance + + if relative_error < tolerance: + print(f"\n✓ TEST PASSED: Relative error {relative_error:.6e} < {tolerance}") + return True + else: + print(f"\n✗ TEST FAILED: Relative error {relative_error:.6e} >= {tolerance}") + return False diff --git a/pymllm/backends/qualcomm/transformers/static_qwen3.py b/pymllm/backends/qualcomm/transformers/static_qwen3.py index 64a1c25b7..1f341648b 100644 --- a/pymllm/backends/qualcomm/transformers/static_qwen3.py +++ b/pymllm/backends/qualcomm/transformers/static_qwen3.py @@ -1,3 +1,457 @@ import torch -import torchao -from torchao.quantization.pt2e.quantize_pt2e import prepare_pt2e +from torch import nn +from torch.nn import functional as F +from pymllm.backends.qualcomm.transformers.core.qdq import QDQ_OP +from pymllm.backends.qualcomm.transformers.core.rms_norm import QRMSNorm +from pymllm.backends.qualcomm.transformers.core.qlinear import ( + QLinearLPBQ, + QLinearW8A16_PerChannelSym_PerTensorSym, +) + + +class Qwen3Config: + def __init__(self): + self.attention_bias = False + self.attention_dropout = 0.0 + self.bos_token_id = 151643 + self.eos_token_id = 151645 + self.head_dim = 128 + self.hidden_act = "silu" + self.hidden_size = 2048 + self.initializer_range = 0.02 + self.intermediate_size = 6144 + self.max_position_embeddings = 40960 + self.max_window_layers = 28 + self.model_type = "qwen3" + self.num_attention_heads = 16 + self.num_hidden_layers = 28 + self.num_key_value_heads = 8 + self.pad_token_id = 151643 + self.rms_norm_eps = 1e-06 + self.rope_scaling = None + self.rope_theta = 1000000 + self.sliding_window = None + self.tie_word_embeddings = True + self.torch_dtype = "bfloat16" + self.transformers_version = "4.51.0" + self.use_cache = True + self.use_sliding_window = False + self.vocab_size = 151936 + + +def generate_rope_cache( + max_length: int, + head_dim: int, + rope_theta: float, + dtype=torch.bfloat16, + device="cpu", +): + """ + Generate RoPE (Rotary Position Embedding) cache for given max_length. + + Args: + max_length: Maximum sequence length + head_dim: Dimension of each attention head + rope_theta: RoPE theta parameter (frequency base) + dtype: Data type for the embeddings + device: Device to place the embeddings on + + Returns: + tuple: (cos, sin) embeddings of shape [max_length, head_dim] + """ + inv_freq = 1.0 / ( + rope_theta + ** (torch.arange(0, head_dim, 2, dtype=torch.float32, device=device) / head_dim) + ) + t = torch.arange(max_length, dtype=torch.float32, device=device) + freqs = torch.einsum("i,j->ij", t, inv_freq) + emb = torch.cat((freqs, freqs), dim=-1) + cos = emb.cos().to(dtype) + sin = emb.sin().to(dtype) + return cos, sin + + +def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: + """ + This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch, + num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim) + """ + batch, num_key_value_heads, slen, head_dim = hidden_states.shape + if n_rep == 1: + return hidden_states + hidden_states = hidden_states[:, :, None, :, :].expand( + batch, num_key_value_heads, n_rep, slen, head_dim + ) + return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim) + + +def rotate_half(x): + """Rotates half the hidden dims of the input.""" + x1 = x[..., : x.shape[-1] // 2] + x2 = x[..., x.shape[-1] // 2 :] + return torch.cat((-x2, x1), dim=-1) + + +def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1): + """Applies Rotary Position Embedding to the query and key tensors. + + Args: + q (`torch.Tensor`): The query tensor. + k (`torch.Tensor`): The key tensor. + cos (`torch.Tensor`): The cosine part of the rotary embedding. + sin (`torch.Tensor`): The sine part of the rotary embedding. + position_ids (`torch.Tensor`, *optional*): + Deprecated and unused. + unsqueeze_dim (`int`, *optional*, defaults to 1): + The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and + sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note + that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and + k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes + cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have + the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2. + Returns: + `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding. + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + q_embed = (q * cos) + (rotate_half(q) * sin) + k_embed = (k * cos) + (rotate_half(k) * sin) + return q_embed, k_embed + + +class Qwen3MLP(nn.Module): + def __init__(self, config): + super().__init__() + self.config = config + self.hidden_size = config.hidden_size + self.intermediate_size = config.intermediate_size + self.gate_proj = QLinearLPBQ( + self.hidden_size, + self.intermediate_size, + bias=False, + block_size=32, + ) + self.up_proj = QLinearLPBQ( + self.hidden_size, + self.intermediate_size, + bias=False, + block_size=32, + ) + self.down_proj = QLinearLPBQ( + self.intermediate_size, + self.hidden_size, + bias=False, + block_size=32, + ) + self.act_fn = nn.SiLU() + + # QDQ + self.qdq_x = QDQ_OP["A16-PerTensor"]() + self.qdq_up_result = QDQ_OP["A16-PerTensor"]() + self.qdq_gate_result = QDQ_OP["A16-PerTensor"]() + self.qdq_act = QDQ_OP["A16-PerTensor"]() + self.qdq_middle = QDQ_OP["A16-PerTensor"]() + + def forward(self, x): + """ + input: + x: bf16, w/o fakequant + output: + o: bf16, w/o fakequant + """ + x = self.qdq_x(x) + up_result = self.qdq_up_result(self.up_proj(x)) + gate_result = self.qdq_gate_result(self.gate_proj(x)) + up_result = self.qdq_act(self.act_fn(up_result)) + o = self.qdq_middle(gate_result * up_result) + o = self.down_proj(o) + return o + + +class Qwen3Attention(nn.Module): + def __init__(self, config, layer_idx: int): + super().__init__() + self.config = config + self.layer_idx = layer_idx + self.head_dim = getattr( + config, "head_dim", config.hidden_size // config.num_attention_heads + ) + self.num_key_value_groups = ( + config.num_attention_heads // config.num_key_value_heads + ) + self.scaling = self.head_dim**-0.5 + self.q_proj = QLinearLPBQ( + config.hidden_size, + config.num_attention_heads * self.head_dim, + bias=False, + block_size=32, + ) + self.k_proj = QLinearLPBQ( + config.hidden_size, + config.num_key_value_heads * self.head_dim, + bias=False, + block_size=32, + ) + self.v_proj = QLinearLPBQ( + config.hidden_size, + config.num_key_value_heads * self.head_dim, + bias=False, + block_size=32, + ) + self.o_proj = QLinearLPBQ( + config.num_attention_heads * self.head_dim, + config.hidden_size, + bias=False, + block_size=32, + ) + self.q_norm = QRMSNorm(self.head_dim, eps=config.rms_norm_eps) + self.k_norm = QRMSNorm(self.head_dim, eps=config.rms_norm_eps) + + # QDQ + self.qdq_hidden_states = QDQ_OP["A16-PerTensor"]() + self.qdq_0 = QDQ_OP["A16-PerTensor"]() + self.qdq_1 = QDQ_OP["A16-PerTensor"]() + self.qdq_2 = QDQ_OP["A16-PerTensor"]() + self.qdq_3 = QDQ_OP["A16-PerTensor"]() + self.qdq_4 = QDQ_OP["A8-PerTensor"]() + self.qdq_5 = QDQ_OP["A16-PerTensor"]() + self.qdq_6 = QDQ_OP["A16-PerTensor"]() + self.qdq_7 = QDQ_OP["A16-PerTensor"]() + self.qdq_8 = QDQ_OP["A16-PerTensor"]() + self.qdq_9 = QDQ_OP["A16-PerTensor"]() + self.qdq_10 = QDQ_OP["A16-PerTensor"]() + self.qdq_11 = QDQ_OP["A16-PerTensor"]() + self.qdq_12 = QDQ_OP["A16-PerTensor"]() + self.qdq_13 = QDQ_OP["A16-PerTensor"]() + self.qdq_14 = QDQ_OP["A8-PerTensor"]() + + self.qdq_rope_0 = QDQ_OP["A16-PerTensor"]() + self.qdq_rope_1 = QDQ_OP["A16-PerTensor"]() + self.qdq_rope_2 = QDQ_OP["A16-PerTensor"]() + self.qdq_rope_3 = QDQ_OP["A16-PerTensor"]() + self.qdq_rope_4 = QDQ_OP["A16-PerTensor"]() + self.qdq_rope_5 = QDQ_OP["A16-PerTensor"]() + + def forward( + self, + hidden_states: torch.Tensor, + sin: torch.Tensor, + cos: torch.Tensor, + causal_mask: torch.Tensor, + ): + """ + input: + hidden_states: bf16, w/o fakequant + output: + o: bf16, w/o fakequant + """ + bsz, seq_len, _ = hidden_states.shape + input_shape = hidden_states.shape[:-1] + hidden_shape = (*input_shape, -1, self.head_dim) + quantized_hidden_states = self.qdq_hidden_states(hidden_states) + + # [B, H, S, D] + query_states = ( + self.q_proj(quantized_hidden_states).view(hidden_shape).transpose(1, 2) + ) + key_states = ( + self.k_proj(quantized_hidden_states).view(hidden_shape).transpose(1, 2) + ) + value_states = ( + self.v_proj(quantized_hidden_states).view(hidden_shape).transpose(1, 2) + ) + + query_states = self.q_norm(self.qdq_0(query_states)) + query_states = self.qdq_1(query_states) + + key_states = self.k_norm(self.qdq_2(key_states)) + key_states = self.qdq_3(key_states) + + # ROPE Here + # cos = cos.unsqueeze(unsqueeze_dim) + # sin = sin.unsqueeze(unsqueeze_dim) + # q_embed = (q * cos) + (rotate_half(q) * sin) + # k_embed = (k * cos) + (rotate_half(k) * sin) + cos_embedding = cos.unsqueeze(1) + sin_embedding = sin.unsqueeze(1) + rot_q = rotate_half(query_states) + rot_k = rotate_half(key_states) + query_states = self.qdq_rope_0( + self.qdq_rope_1(query_states * cos_embedding) + + self.qdq_rope_2(rot_q * sin_embedding) + ) + key_states = self.qdq_rope_3( + self.qdq_rope_4(key_states * cos_embedding) + + self.qdq_rope_5(rot_k * sin_embedding) + ) + + key_states = self.qdq_4(key_states) + key_states = key_states.transpose(2, 3) # [B, H, D, S] + key_states = repeat_kv(key_states, self.num_key_value_groups) + + attn = query_states @ key_states + attn = self.qdq_5(attn) + attn = attn / self.qdq_6(torch.ones(1, dtype=torch.bfloat16) * self.scaling) + attn = self.qdq_7(attn) + attn_min = torch.amin(attn, dim=-1, keepdim=True) + attn_min = self.qdq_8(attn_min) + attn_vv = attn_min - 20 + attn_vv = self.qdq_9(attn_vv) + attn = torch.where(causal_mask == 0, attn, attn_vv) + attn = self.qdq_10(attn) + attn = F.softmax(attn, -1) + attn = self.qdq_11(attn) + y = attn @ self.qdq_14(self.qdq_13(value_states)) + y = self.qdq_12(y) + y = y.transpose(1, 2).reshape(bsz, seq_len, -1) + y = self.o_proj(y) + return y + + +class Qwen3DecodeLayer(nn.Module): + def __init__(self, config, layer_idx: int): + super().__init__() + self.hidden_size = config.hidden_size + self.self_attn = Qwen3Attention(config=config, layer_idx=layer_idx) + self.mlp = Qwen3MLP(config) + self.input_layernorm = QRMSNorm(config.hidden_size, eps=config.rms_norm_eps) + self.post_attention_layernorm = QRMSNorm( + config.hidden_size, eps=config.rms_norm_eps + ) + + self.qdq_0 = QDQ_OP["A16-PerTensor"]() + self.qdq_1 = QDQ_OP["A16-PerTensor"]() + self.qdq_2 = QDQ_OP["A16-PerTensor"]() + self.qdq_3 = QDQ_OP["A16-PerTensor"]() + + def forward( + self, + hidden_states: torch.Tensor, + sin: torch.Tensor, + cos: torch.Tensor, + causal_mask: torch.Tensor, + ): + """ + inputs: + hidden_states: bf16, w/o fakequant + outputs: + hidden_states: bf16, w/o fakequant + """ + hidden_states = self.qdq_0(hidden_states) + residual = hidden_states + hidden_states = self.input_layernorm(hidden_states) + # Self Attention + hidden_states = self.self_attn( + hidden_states, + sin, + cos, + causal_mask, + ) + hidden_states = self.qdq_2(residual + self.qdq_1(hidden_states)) + + # Fully Connected + residual = hidden_states + hidden_states = self.post_attention_layernorm(hidden_states) + hidden_states = self.mlp(hidden_states) + hidden_states = residual + self.qdq_3(hidden_states) + return hidden_states + + +class Qwen3Model(nn.Module): + def __init__(self, config): + super().__init__() + self.config = config + self.padding_idx = config.pad_token_id + self.vocab_size = config.vocab_size + self.embed_tokens = nn.Embedding( + config.vocab_size, config.hidden_size, self.padding_idx + ) + self.layers = nn.ModuleList( + [ + Qwen3DecodeLayer(config, layer_idx) + for layer_idx in range(config.num_hidden_layers) + ] + ) + self.norm = QRMSNorm(config.hidden_size, eps=config.rms_norm_eps) + self.qdq_0 = QDQ_OP["A16-PerTensor"]() + + def forward(self, input_ids, sin, cos, causal_mask): + inputs_embeds = self.embed_tokens(input_ids) + hidden_states = inputs_embeds + + for decoder_layer in self.layers[: self.config.num_hidden_layers]: + hidden_states = decoder_layer(hidden_states, sin, cos, causal_mask) + + hidden_states = self.norm(self.qdq_0(hidden_states)) + return hidden_states + + +class Qwen3ForCausalLM: + def __init__(self, config): + self.config = config + self.model = Qwen3Model(config) + self.vocab_size = config.vocab_size + self.lm_head = QLinearW8A16_PerChannelSym_PerTensorSym( + config.hidden_size, config.vocab_size, bias=False + ) + self.qdq_0 = QDQ_OP["A16-PerTensor"]() + self.qdq_1 = QDQ_OP["A16-PerTensor"]() + self.qdq_2 = QDQ_OP["A16-PerTensor"]() + + # Register sin and cos as buffers + self.register_buffer("sin", None) + self.register_buffer("cos", None) + + self.k_cache = None + self.v_cache = None + + def forward( + self, + input_ids, + position_ids, + max_length, + ): + bsz, seq_len = input_ids.shape + + # Generate causal mask based on position_ids length + # For prefill, we need a lower triangular mask + causal_mask = 1 - torch.tril( + torch.ones(seq_len, seq_len, dtype=torch.int8, device=input_ids.device) + ) + causal_mask = causal_mask.unsqueeze(0).unsqueeze(0) # [1, 1, seq_len, seq_len] + + # Generate or use registered RoPE embeddings + if self.sin is None or self.cos is None or self.cos.shape[0] < max_length: + cos, sin = generate_rope_cache( + max_length, + head_dim=self.config.head_dim, + rope_theta=self.config.rope_theta, + dtype=torch.bfloat16, + device=input_ids.device, + ) + # Register the generated embeddings + self.sin = self.qdq_1(sin) + self.cos = self.qdq_2(cos) + + if self.k_cache is None or self.v_cache is None: + pass + + # Slice RoPE embeddings to current sequence length + cos = self.cos[position_ids] + sin = self.sin[position_ids] + + out = self.model(input_ids, sin, cos, causal_mask) + logits = self.lm_head(self.qdq_0(out)) + return logits + + def _update_kv_cache_by_copy(self): + pass + + def _freeze_observer(self): + pass + + def infer(self, model_path: str, prompt: str, max_length) -> str: + pass + + def calibrate(self, model_path: str, dataset_path: str): + pass From a18fe97d7ef2a8baadf6afc9a92505efad1c7e15 Mon Sep 17 00:00:00 2001 From: chenghuaWang <2923277184@qq.com> Date: Wed, 31 Dec 2025 09:38:39 +0000 Subject: [PATCH 02/13] fix: add som comments and fix causal mask decoding bug. --- .../qualcomm/transformers/static_qwen3.py | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/pymllm/backends/qualcomm/transformers/static_qwen3.py b/pymllm/backends/qualcomm/transformers/static_qwen3.py index 1f341648b..d7a6ffb13 100644 --- a/pymllm/backends/qualcomm/transformers/static_qwen3.py +++ b/pymllm/backends/qualcomm/transformers/static_qwen3.py @@ -9,6 +9,7 @@ ) +# This settings below is for Qwen1.7B class Qwen3Config: def __init__(self): self.attention_bias = False @@ -411,14 +412,21 @@ def forward( position_ids, max_length, ): - bsz, seq_len = input_ids.shape + _, seq_len = input_ids.shape # Generate causal mask based on position_ids length # For prefill, we need a lower triangular mask - causal_mask = 1 - torch.tril( - torch.ones(seq_len, seq_len, dtype=torch.int8, device=input_ids.device) - ) - causal_mask = causal_mask.unsqueeze(0).unsqueeze(0) # [1, 1, seq_len, seq_len] + if seq_len != 1: + causal_mask = 1 - torch.tril( + torch.ones(seq_len, seq_len, dtype=torch.int8, device=input_ids.device) + ) + # [1, 1, seq_len, seq_len] + causal_mask = causal_mask.unsqueeze(0).unsqueeze(0) + else: + # [1, 1, seq_len, seq_len] + causal_mask = torch.zeros( + (1, 1, 1, seq_len), dtype=torch.int8, device=input_ids.device + ) # Generate or use registered RoPE embeddings if self.sin is None or self.cos is None or self.cos.shape[0] < max_length: @@ -454,4 +462,7 @@ def infer(self, model_path: str, prompt: str, max_length) -> str: pass def calibrate(self, model_path: str, dataset_path: str): + """ + calibrate Only on PREFILL stage !!! + """ pass From ca558923f4d5686d37ecfd1104cc0ad70ad92713 Mon Sep 17 00:00:00 2001 From: chenghuaWang <2923277184@qq.com> Date: Wed, 31 Dec 2025 09:54:31 +0000 Subject: [PATCH 03/13] feat: add kvcache in attention --- .../qualcomm/transformers/static_qwen3.py | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/pymllm/backends/qualcomm/transformers/static_qwen3.py b/pymllm/backends/qualcomm/transformers/static_qwen3.py index d7a6ffb13..becbe9f48 100644 --- a/pymllm/backends/qualcomm/transformers/static_qwen3.py +++ b/pymllm/backends/qualcomm/transformers/static_qwen3.py @@ -233,6 +233,9 @@ def __init__(self, config, layer_idx: int): self.qdq_rope_4 = QDQ_OP["A16-PerTensor"]() self.qdq_rope_5 = QDQ_OP["A16-PerTensor"]() + self.k_cache = None + self.v_cache = None + def forward( self, hidden_states: torch.Tensor, @@ -287,8 +290,25 @@ def forward( ) key_states = self.qdq_4(key_states) - key_states = key_states.transpose(2, 3) # [B, H, D, S] + # [B, H, D, S] + key_states = key_states.transpose(2, 3) + # [B, H, S, D] + value_states = self.qdq_14(self.qdq_13(value_states)) + + # KV Cache Here + if seq_len > 1 and self.k_cache is not None and self.v_cache is not None: + self.k_cache = None + self.v_cache = None + + if seq_len == 1: + self.k_cache = torch.cat([self.k_cache, key_states], dim=-1) + self.v_cache = torch.cat([self.v_cache, value_states], dim=2) + else: + self.k_cache = key_states + self.v_cache = value_states + key_states = repeat_kv(key_states, self.num_key_value_groups) + value_states = repeat_kv(value_states, self.num_key_value_groups) attn = query_states @ key_states attn = self.qdq_5(attn) @@ -302,7 +322,7 @@ def forward( attn = self.qdq_10(attn) attn = F.softmax(attn, -1) attn = self.qdq_11(attn) - y = attn @ self.qdq_14(self.qdq_13(value_states)) + y = attn @ value_states y = self.qdq_12(y) y = y.transpose(1, 2).reshape(bsz, seq_len, -1) y = self.o_proj(y) @@ -403,9 +423,6 @@ def __init__(self, config): self.register_buffer("sin", None) self.register_buffer("cos", None) - self.k_cache = None - self.v_cache = None - def forward( self, input_ids, @@ -441,9 +458,6 @@ def forward( self.sin = self.qdq_1(sin) self.cos = self.qdq_2(cos) - if self.k_cache is None or self.v_cache is None: - pass - # Slice RoPE embeddings to current sequence length cos = self.cos[position_ids] sin = self.sin[position_ids] @@ -465,4 +479,5 @@ def calibrate(self, model_path: str, dataset_path: str): """ calibrate Only on PREFILL stage !!! """ + # Call infer after calibrate done. pass From dd58482dd5ad5b93cd48b2df02cb96f3a8fa1502 Mon Sep 17 00:00:00 2001 From: chenghuaWang <2923277184@qq.com> Date: Fri, 2 Jan 2026 15:58:28 +0000 Subject: [PATCH 04/13] feat: Qualcomm Calibrate Things --- mllm/backends/qnn/aot/QnnWrappersAPI.cpp | 1 + .../qualcomm/transformers/core/qdq.py | 59 +- .../qualcomm/transformers/core/qlinear.py | 257 +++---- .../qualcomm/transformers/core/rms_norm.py | 89 ++- .../transformers/qwen3/modeling_qwen3.py | 687 ++++++++++++++++++ .../qualcomm/transformers/qwen3/runner.py | 155 ++++ .../qualcomm/transformers/static_qwen3.py | 240 ++++-- .../backends/qualcomm/transformers/train.py | 6 + requirements-qnn-aot.txt | 1 + 9 files changed, 1224 insertions(+), 271 deletions(-) create mode 100644 pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py create mode 100644 pymllm/backends/qualcomm/transformers/qwen3/runner.py create mode 100644 requirements-qnn-aot.txt diff --git a/mllm/backends/qnn/aot/QnnWrappersAPI.cpp b/mllm/backends/qnn/aot/QnnWrappersAPI.cpp index 0fd354de3..0f29498f5 100644 --- a/mllm/backends/qnn/aot/QnnWrappersAPI.cpp +++ b/mllm/backends/qnn/aot/QnnWrappersAPI.cpp @@ -865,6 +865,7 @@ QnnAOTNodeTensor::ptr_t QnnAOTEnv::captureQnnAOTNodeTensor(const std::string& qn auto ret = QnnAOTNodeTensor::create(v, __qnn_enable_static_weight); if (__qnn_enable_static_weight) { contexts_[qnn_context_name]->static_tensor_.insert({__qnn_tensor_name, ret}); + // FIXME, That may be error. qnn_htp_func_symbols_.qnn_interface_.tensorCreateContextTensor(contexts_[qnn_context_name]->qnn_ctx_handle_, ret->getQnnTensor()); } else { diff --git a/pymllm/backends/qualcomm/transformers/core/qdq.py b/pymllm/backends/qualcomm/transformers/core/qdq.py index 9d087a7b9..c7bc351de 100644 --- a/pymllm/backends/qualcomm/transformers/core/qdq.py +++ b/pymllm/backends/qualcomm/transformers/core/qdq.py @@ -3,41 +3,35 @@ from torch.ao.quantization import FakeQuantize, MinMaxObserver -class ActivationQDQInt16PerTensorSym(nn.Module): - def __init__(self): - super().__init__() - self.fake_quant = FakeQuantize( - observer=MinMaxObserver, - quant_min=-32768, - quant_max=32767, - dtype=torch.qint32, - qscheme=torch.per_tensor_symmetric, - ) - self.enable_observer() +class ActivationQDQ(nn.Module): + """ + General activation value pseudo-quantization module (QDQ). + Supports symmetric Per-Tensor quantization, configurable bit numbers (e.g., 8-bit or 16-bit). + """ - def forward(self, x): - return self.fake_quant(x) - - def enable_observer(self): - self.fake_quant.enable_observer() - - def disable_observer(self): - self.fake_quant.disable_observer() + def __init__(self, bits=8, qscheme=torch.per_tensor_symmetric): + super().__init__() + # 1. Calculate quantization range based on bits + # int8: -128 to 127 + # int16: -32768 to 32767 + self.quant_min = -(2 ** (bits - 1)) + self.quant_max = 2 ** (bits - 1) - 1 -class ActivationQDQInt8PerTensorSym(nn.Module): - def __init__(self): - super().__init__() + # 2. Initialize FakeQuantize + # For activations, typically use MinMaxObserver or MovingAverageMinMaxObserver self.fake_quant = FakeQuantize( - observer=MinMaxObserver, - quant_min=-128, - quant_max=127, + observer=MinMaxObserver.with_args(qscheme=qscheme, dtype=torch.qint32), + quant_min=self.quant_min, + quant_max=self.quant_max, dtype=torch.qint32, - qscheme=torch.per_tensor_symmetric, + qscheme=qscheme, ) - self.enable_observer() def forward(self, x): + # Directly apply pseudo-quantization. + # When observer is enabled, it continuously updates scale/zp; + # When fakequant is enabled, it simulates quantization errors. return self.fake_quant(x) def enable_observer(self): @@ -46,8 +40,11 @@ def enable_observer(self): def disable_observer(self): self.fake_quant.disable_observer() + def enable_fakequant(self): + self.fake_quant.enable_fakequant() + + def disable_fakequant(self): + self.fake_quant.disable_fakequant() -QDQ_OP = { - "A8-PerTensor": ActivationQDQInt8PerTensorSym, - "A16-PerTensor": ActivationQDQInt16PerTensorSym, -} + def extra_repr(self): + return f"bits={self.quant_max.bit_length() + 1}, q_range=({self.quant_min}, {self.quant_max})" diff --git a/pymllm/backends/qualcomm/transformers/core/qlinear.py b/pymllm/backends/qualcomm/transformers/core/qlinear.py index 654f05f3d..bbfcc60df 100644 --- a/pymllm/backends/qualcomm/transformers/core/qlinear.py +++ b/pymllm/backends/qualcomm/transformers/core/qlinear.py @@ -1,7 +1,7 @@ import torch import torch.nn as nn import torch.nn.functional as F -from torch.ao.quantization import FakeQuantize, MinMaxObserver, PerChannelMinMaxObserver +from torch.ao.quantization import FakeQuantize, PerChannelMinMaxObserver class QLinear(nn.Module): @@ -9,179 +9,144 @@ def __init__(self, in_features, out_features, bias=True): super().__init__() self.in_features = in_features self.out_features = out_features - self.weight = nn.Parameter( - torch.randn(out_features, in_features, dtype=torch.bfloat16) - ) + self.weight = nn.Parameter(torch.randn(out_features, in_features)) if bias: - self.bias = nn.Parameter(torch.zeros(out_features, dtype=torch.bfloat16)) + self.bias = nn.Parameter(torch.zeros(out_features)) else: self.register_parameter("bias", None) self.act_quant = None self.weight_quant = None - self.w_q_cache = None - - def _setup_status(self, already_quantized_w, already_quantized_a): - if self.act_quant: - if already_quantized_a: - self.act_quant.disable_observer() - else: - self.act_quant.enable_observer() - if self.weight_quant: - if already_quantized_w: + + def freeze_weight(self): + """PTQ Core: Observe current weights, calculate and fix Scale/ZP""" + if self.weight_quant is not None: + # Compatible with official FakeQuantize module + if ( + isinstance(self.weight_quant, FakeQuantize) + and self.weight_quant is not None + ): + _ = self.weight_quant(self.weight) self.weight_quant.disable_observer() - else: - self.weight_quant.enable_observer() - - def _clear_cache(self): - self.w_q_cache = None - - -class QLinearW8A16_PerChannelSym_PerTensorSym(QLinear): - def __init__( - self, - in_features, - out_features, - bias=True, - already_quantized_weight=False, - already_quantized_activation=False, - ): + s = self.weight_quant.scale + print( + f"[{self.__class__.__name__}] Scale Shape: {list(s.shape)}, " + f"scale[:3]: {s.flatten()[:3].tolist()}" + ) + # Compatible with custom LPBQ logic + elif hasattr(self.weight_quant, "freeze"): + self.weight_quant.freeze(self.weight.detach()) + s = self.weight_quant.scale_2_fp32 + if s is not None: + print( + f"[{self.__class__.__name__}] LPBQ L2 Scale Shape: {list(s.shape)}, " + f"scale[:3]: {s.flatten()[:3].tolist()}" + ) + + def forward(self, x): + raise NotImplementedError + + +# --- 1. W8A16 Per-Channel Scheme --- +class QLinearW8A16_PerChannelSym(QLinear): + def __init__(self, in_features, out_features, bias=True): super().__init__(in_features, out_features, bias) + # Weight: Int8 Per-Channel symmetric self.weight_quant = FakeQuantize( - observer=PerChannelMinMaxObserver, + observer=PerChannelMinMaxObserver.with_args( + qscheme=torch.per_channel_symmetric, + dtype=torch.qint8, + ch_axis=0, # Quantize output channels + ), quant_min=-128, quant_max=127, - dtype=torch.qint32, + dtype=torch.qint8, qscheme=torch.per_channel_symmetric, - ch_axis=0, ) - self._setup_status(already_quantized_weight, already_quantized_activation) def forward(self, x): + # Activation quantization logic (add act_quant here if needed) x_q = x - if self.w_q_cache is not None: - w_q = self.w_q_cache - else: - w_q = self.weight_quant(self.weight) - self.w_q_cache = w_q + # Apply fake quantization: use fixed scale if frozen, otherwise update in real-time + w_q = self.weight_quant(self.weight) return F.linear(x_q, w_q, self.bias) -class QLinearLPBQ(QLinear): - def __init__( - self, - in_features, - out_features, - bias=True, - block_size=64, - already_quantized_weight=False, - already_quantized_activation=False, - ): - super().__init__(in_features, out_features, bias) +# --- 2. LPBQ (Double Quantization) Scheme --- +class DoubleQuantizer(nn.Module): + """ + Handles LPBQ double normalization logic to work like FakeQuantize + """ + def __init__(self, block_size=64): + super().__init__() self.block_size = block_size - self.already_quantized_w = already_quantized_weight - - # Define buffers to store quantization parameters - # Initially set to None, populated during first forward pass, or saved to state_dict - self.register_buffer("scale_2_fp32", None) # Level 2 Scale (FP32/BF16) - self.register_buffer( - "scale_1_uint4", None - ) # Level 1 Scale Indices (Uint4 stored as Uint8) - self.register_buffer("weight_q", None) # Weight Indices (Int4 stored as Int8) - - self._setup_status(already_quantized_weight, already_quantized_activation) - - def _fake_quant_weight_double(self, w): - """ - Double quantization calculation (no STE, forward-only simulation) - And save quantization parameters to Buffer - """ + self.register_buffer("is_frozen", torch.tensor(False)) + self.register_buffer("scale_2_fp32", None) + self.register_buffer("scale_1_uint4", None) + self.register_buffer("weight_q", None) + self.w_recon_cached = None # Cache dequantized weights for acceleration + + def freeze(self, w): + # Run complete double quantization and store in buffer + self.w_recon_cached = self.quantize_dequantize(w, save_buffers=True) + self.is_frozen = torch.tensor(True) + + def quantize_dequantize(self, w, save_buffers=False): out_channels, in_channels = w.shape + # 1. Padding handling + pad_len = (self.block_size - in_channels % self.block_size) % self.block_size + if pad_len > 0: + w = F.pad(w, (0, pad_len), "constant", 0) - # 1. Padding - padding = 0 - if in_channels % self.block_size != 0: - padding = self.block_size - (in_channels % self.block_size) - w = F.pad(w, (0, padding), "constant", 0) - - # Reshape: [Out, Num_Blocks, Block_Size] w_reshaped = w.view(out_channels, -1, self.block_size) - # ======================================================= - # Level 1 Scale Calculation (Ideal FP32) - # ======================================================= - w_int4_max = 7.0 - # w_int4_min = -8.0 - - # [Out, Num_Blocks, 1] - w_abs_max = w_reshaped.abs().amax(dim=-1, keepdim=True) - scale_1_fp32 = w_abs_max / w_int4_max - scale_1_fp32 = torch.clamp(scale_1_fp32, min=1e-8) - - # ======================================================= - # Level 2 Scale Calculation & Level 1 Scale Quantization - # ======================================================= - s_uint4_max = 15.0 - s_uint4_min = 0.0 - - # Calculate Level 2 Scale (Per-Channel FP32) -> [Out, 1, 1] - scale_2_fp32 = scale_1_fp32.amax(dim=1, keepdim=True) / s_uint4_max - scale_2_fp32 = torch.clamp(scale_2_fp32, min=1e-8) - - # Quantize Level 1 Scale: FP32 -> Uint4 Indices - scale_1_q = torch.round(scale_1_fp32 / scale_2_fp32) - scale_1_q = torch.clamp(scale_1_q, s_uint4_min, s_uint4_max) - - # Dequantize Level 1 Scale - scale_1_recon = scale_1_q * scale_2_fp32 - - # ======================================================= - # Apply Level 1 Quantization (Quantize Weights) - # ======================================================= - w_int4_min = -8.0 - - # Quantize Weight: FP32 -> Int4 Indices - w_q = torch.round(w_reshaped / scale_1_recon) - w_q = torch.clamp(w_q, w_int4_min, w_int4_max) - - # Dequantize Weight - w_recon = w_q * scale_1_recon - - # ======================================================= - # [NEW] Store Scales and Indices - # ======================================================= - # Note: We store Indices here, typically converted to int8/uint8 to save space - # scale_2 itself is a floating-point number, kept as is - self.scale_2_fp32 = scale_2_fp32.detach() - # scale_1_q is 0-15, stored as uint8 - self.scale_1_uint4 = scale_1_q.detach().to(torch.uint8) - # w_q is -8 to 7, stored as int8 - self.weight_q = w_q.detach().to(torch.int8) - - # ======================================================= - # Restore Shape - # ======================================================= - w_out = w_recon.view(out_channels, -1) - if padding > 0: - w_out = w_out[:, :-padding] + # Level 1: FP32 Scale + s1 = w_reshaped.abs().amax(dim=-1, keepdim=True) / 7.0 + s1 = s1.clamp(min=1e-8) - return w_out.to(torch.bfloat16) + # Level 2: Quantize S1 to Uint4 + s2 = s1.amax(dim=1, keepdim=True) / 15.0 + s2 = s2.clamp(min=1e-8) + s1_q = (s1 / s2).round().clamp(0, 15) + s1_recon = s1_q * s2 - def forward(self, x): - x_q = x + # Level 3: Quantize Weight to Int4 + w_q = (w_reshaped / s1_recon).round().clamp(-8, 7) + w_recon = w_q * s1_recon - if self.w_q_cache is not None: - w_q = self.w_q_cache - else: - if self.already_quantized_w: - w_q = self.weight - else: - # Real-time calculation and update of self.scale_2, self.scale_1_idx, self.weight_idx - w_q = self._fake_quant_weight_double(self.weight) + if save_buffers: + self.scale_2_fp32 = s2.detach() + self.scale_1_uint4 = s1_q.detach().to(torch.uint8) + self.weight_q = w_q.detach().to(torch.int8) - if self.use_weight_cache: - self.w_q_cache = w_q + # Restore shape + w_out = w_recon.view(out_channels, -1) + if pad_len > 0: + w_out = w_out[:, :-pad_len] + return w_out + + def forward(self, w): + if self.is_frozen: + # If frozen, directly return cached reconstructed weights (or real-time dequantization from Buffer) + if self.w_recon_cached is None: + # Logic to reconstruct from weight_q + scale_1 + scale_2 can be written here + pass + return ( + self.w_recon_cached + if self.w_recon_cached is not None + else self.quantize_dequantize(w) + ) + return self.quantize_dequantize(w) - return F.linear(x_q, w_q, self.bias) + +class QLinearLPBQ(QLinear): + def __init__(self, in_features, out_features, bias=True, block_size=64): + super().__init__(in_features, out_features, bias) + self.weight_quant = DoubleQuantizer(block_size) + + def forward(self, x): + # Must use quantized weights w_q for computation + w_q = self.weight_quant(self.weight) + return F.linear(x, w_q, self.bias) diff --git a/pymllm/backends/qualcomm/transformers/core/rms_norm.py b/pymllm/backends/qualcomm/transformers/core/rms_norm.py index eb9ec8d88..5606dafaa 100644 --- a/pymllm/backends/qualcomm/transformers/core/rms_norm.py +++ b/pymllm/backends/qualcomm/transformers/core/rms_norm.py @@ -1,72 +1,67 @@ import torch import torch.nn as nn -import torch.nn.functional as F from torch.ao.quantization import FakeQuantize, MinMaxObserver class QRMSNorm(nn.Module): - """ - RMSNorm with int16 per-tensor symmetric quantized weight. - - This implementation applies quantization to the weight tensor only, - using per-tensor symmetric quantization with int16 range. - """ - def __init__( self, normalized_shape, eps=1e-6, - elementwise_affine=True, - already_quantized_weight=False, + quant_bits=16, ): super().__init__() - + self.eps = eps if isinstance(normalized_shape, int): normalized_shape = (normalized_shape,) - self.normalized_shape = tuple(normalized_shape) - self.eps = eps - self.already_quantized_w = already_quantized_weight - if elementwise_affine: - self.weight = nn.Parameter( - torch.ones(normalized_shape, dtype=torch.bfloat16) - ) - else: - self.register_parameter("weight", None) + self.weight = nn.Parameter(torch.ones(normalized_shape)) - # Weight quantization for int16 per-tensor symmetric - self.weight_quant = FakeQuantize( - observer=MinMaxObserver, - quant_min=-32768, - quant_max=32767, + # Quantization configuration for Weight + self.weight_fake_quant = FakeQuantize( + observer=MinMaxObserver.with_args( + qscheme=torch.per_tensor_symmetric, dtype=torch.qint32 + ), + quant_min=-(2 ** (quant_bits - 1)), + quant_max=2 ** (quant_bits - 1) - 1, dtype=torch.qint32, qscheme=torch.per_tensor_symmetric, ) - self.w_q_cache = None - self.use_weight_cache = already_quantized_weight - - def _clear_cache(self): - self.w_q_cache = None - def forward(self, x): - # Compute RMS norm - variance = x.to(torch.float32).pow(2).mean(-1, keepdim=True) - x = x * torch.rsqrt(variance + self.eps) + # 1. RMSNorm basic logic (using float32 to ensure stability) + input_dtype = x.dtype + x_fp32 = x.float() + variance = x_fp32.pow(2).mean(-1, keepdim=True) + x_normed = x_fp32 * torch.rsqrt(variance + self.eps) - # Apply quantized weight - if self.weight is not None: - if self.w_q_cache is not None: - w_q = self.w_q_cache - else: - if self.already_quantized_w: - w_q = self.weight - else: - w_q = self.weight_quant(self.weight) + # 2. Weight fake quantization + # If observer is not closed, this step will continuously update scale/zp + # If freeze_weight() is called, this will just use fixed scale/zp for quantization + w_q = self.weight_fake_quant(self.weight) - if self.use_weight_cache: - self.w_q_cache = w_q + return (x_normed * w_q).to(input_dtype) + + @torch.no_grad() + def freeze_weight(self): + """ + Manually trigger Observer to observe and calculate scale, then lock it. + Solve the problem of output being 0 on first run. + """ + self.weight_fake_quant.activation_post_process(self.weight) + s, zp = self.weight_fake_quant.activation_post_process.calculate_qparams() + self.weight_fake_quant.scale.copy_(s) + self.weight_fake_quant.zero_point.copy_(zp) + self.weight_fake_quant.disable_observer() + class_name = self.__class__.__name__ + instance_class_name = type(self).__name__ + print( + f"Class: {class_name}, Instance: {instance_class_name}, Weight Quantized: scale={self.weight_fake_quant.scale}, zp={self.weight_fake_quant.zero_point}" + ) - x = x * w_q + def disable_quant(self): + """Completely turn off quantization noise and return to floating point mode""" + self.weight_fake_quant.disable_fakequant() - return x + def extra_repr(self): + return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}" diff --git a/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py b/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py new file mode 100644 index 000000000..5918b5d85 --- /dev/null +++ b/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py @@ -0,0 +1,687 @@ +# coding=utf-8 +# Copyright 2025 The Qwen team, Alibaba Group and the HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Callable, Optional, Union + +import torch +from torch import nn + +from transformers.activations import ACT2FN +from transformers.cache_utils import Cache, DynamicCache +from transformers.generation import GenerationMixin +from transformers.masking_utils import ( + create_causal_mask, + create_sliding_window_causal_mask, +) +from transformers.modeling_flash_attention_utils import FlashAttentionKwargs +from transformers.modeling_layers import ( + GenericForQuestionAnswering, + GenericForSequenceClassification, + GenericForTokenClassification, + GradientCheckpointingLayer, +) +from transformers.modeling_outputs import ( + BaseModelOutputWithPast, + CausalLMOutputWithPast, +) +from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update +from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel +from transformers.processing_utils import Unpack +from transformers.utils import TransformersKwargs, auto_docstring, can_return_tuple +from transformers.utils.deprecation import deprecate_kwarg +from transformers.utils.generic import check_model_inputs +from transformers.models.qwen3.configuration_qwen3 import Qwen3Config + +# Replace linear, rms_norm with: +from pymllm.backends.qualcomm.transformers.core.rms_norm import QRMSNorm +from pymllm.backends.qualcomm.transformers.core.qlinear import ( + QLinearLPBQ, + QLinearW8A16_PerChannelSym, +) +from pymllm.backends.qualcomm.transformers.core.qdq import ActivationQDQ + + +class Qwen3MLP(nn.Module): + def __init__(self, config): + super().__init__() + self.config = config + self.hidden_size = config.hidden_size + self.intermediate_size = config.intermediate_size + self.gate_proj = QLinearLPBQ( + self.hidden_size, self.intermediate_size, bias=False, block_size=32 + ) + self.up_proj = QLinearLPBQ( + self.hidden_size, self.intermediate_size, bias=False, block_size=32 + ) + self.down_proj = QLinearLPBQ( + self.intermediate_size, self.hidden_size, bias=False, block_size=32 + ) + self.act_fn = ACT2FN[config.hidden_act] + + # QDQ + self.up_proj_input_qdq = ActivationQDQ(bits=16) + self.up_proj_output_qdq = ActivationQDQ(bits=16) + self.gate_proj_output_qdq = ActivationQDQ(bits=16) + self.act_output_qdq = ActivationQDQ(bits=16) + self.down_proj_input_qdq = ActivationQDQ(bits=16) + + def forward(self, x): + x = self.up_proj_input_qdq(x) + up_result = self.up_proj_output_qdq(self.up_proj(x)) + gate_result = self.gate_proj_output_qdq(self.gate_proj(x)) + gate_result = self.act_output_qdq(self.act_fn(gate_result)) + o = self.down_proj_input_qdq(gate_result * up_result) + o = self.down_proj(o) + return o + + +def rotate_half(x): + """Rotates half the hidden dims of the input.""" + x1 = x[..., : x.shape[-1] // 2] + x2 = x[..., x.shape[-1] // 2 :] + return torch.cat((-x2, x1), dim=-1) + + +def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1): + """Applies Rotary Position Embedding to the query and key tensors. + + Args: + q (`torch.Tensor`): The query tensor. + k (`torch.Tensor`): The key tensor. + cos (`torch.Tensor`): The cosine part of the rotary embedding. + sin (`torch.Tensor`): The sine part of the rotary embedding. + position_ids (`torch.Tensor`, *optional*): + Deprecated and unused. + unsqueeze_dim (`int`, *optional*, defaults to 1): + The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and + sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note + that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and + k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes + cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have + the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2. + Returns: + `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding. + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + q_embed = (q * cos) + (rotate_half(q) * sin) + k_embed = (k * cos) + (rotate_half(k) * sin) + return q_embed, k_embed + + +def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: + """ + This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch, + num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim) + """ + batch, num_key_value_heads, slen, head_dim = hidden_states.shape + if n_rep == 1: + return hidden_states + hidden_states = hidden_states[:, :, None, :, :].expand( + batch, num_key_value_heads, n_rep, slen, head_dim + ) + return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim) + + +class Qwen3Attention(nn.Module): + """Multi-headed attention from 'Attention Is All You Need' paper""" + + def __init__(self, config: Qwen3Config, layer_idx: int): + super().__init__() + self.config = config + self.layer_idx = layer_idx + self.head_dim = getattr( + config, "head_dim", config.hidden_size // config.num_attention_heads + ) + self.num_key_value_groups = ( + config.num_attention_heads // config.num_key_value_heads + ) + self.scaling = self.head_dim**-0.5 + self.attention_dropout = config.attention_dropout + self.is_causal = True + + self.q_proj = QLinearLPBQ( + config.hidden_size, + config.num_attention_heads * self.head_dim, + bias=config.attention_bias, + block_size=32, + ) + self.k_proj = QLinearLPBQ( + config.hidden_size, + config.num_key_value_heads * self.head_dim, + bias=config.attention_bias, + block_size=32, + ) + self.v_proj = QLinearLPBQ( + config.hidden_size, + config.num_key_value_heads * self.head_dim, + bias=config.attention_bias, + block_size=32, + ) + self.o_proj = QLinearLPBQ( + config.num_attention_heads * self.head_dim, + config.hidden_size, + bias=config.attention_bias, + block_size=32, + ) + self.q_norm = QRMSNorm( + self.head_dim, eps=config.rms_norm_eps, quant_bits=16 + ) # unlike olmo, only on the head dim! + self.k_norm = QRMSNorm( + self.head_dim, eps=config.rms_norm_eps, quant_bits=16 + ) # thus post q_norm does not need reshape + self.sliding_window = ( + config.sliding_window + if config.layer_types[layer_idx] == "sliding_attention" + else None + ) + + # QDQ + self.q_proj_input_qdq = ActivationQDQ(bits=16) + self.q_norm_input_qdq = ActivationQDQ(bits=16) + self.q_norm_output_qdq = ActivationQDQ(bits=16) + self.k_norm_input_qdq = ActivationQDQ(bits=16) + self.k_norm_output_qdq = ActivationQDQ(bits=16) + self.q_rope_mul_0_output_qdq = ActivationQDQ(bits=16) + self.q_rope_mul_1_output_qdq = ActivationQDQ(bits=16) + self.q_rope_add_0_output_qdq = ActivationQDQ(bits=16) + self.k_rope_mul_0_output_qdq = ActivationQDQ(bits=16) + self.k_rope_mul_1_output_qdq = ActivationQDQ(bits=16) + self.k_rope_add_0_output_qdq = ActivationQDQ(bits=16) + self.k_cast_to_int8_qdq = ActivationQDQ(bits=8) + self.v_cast_to_int8_qdq = ActivationQDQ(bits=8) + self.qk_matmul_output_qdq = ActivationQDQ(bits=16) + self.scaling_qdq = ActivationQDQ(bits=16) + self.reduce_min_output_qdq = ActivationQDQ(bits=16) + self.minus_0_output_qdq = ActivationQDQ(bits=16) + self.softmax_output_qdq = ActivationQDQ(bits=16) + self.attn_value_matmul_output_qdq = ActivationQDQ(bits=16) + + @deprecate_kwarg("past_key_value", new_name="past_key_values", version="4.58") + def forward( + self, + hidden_states: torch.Tensor, + position_embeddings: tuple[torch.Tensor, torch.Tensor], + attention_mask: Optional[torch.Tensor], + past_key_values: Optional[Cache] = None, + cache_position: Optional[torch.LongTensor] = None, + **kwargs: Unpack[FlashAttentionKwargs], + ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: + input_shape = hidden_states.shape[:-1] + hidden_shape = (*input_shape, -1, self.head_dim) + + hidden_states = self.q_proj_input_qdq(hidden_states) + + query_states = self.q_norm( + self.q_norm_input_qdq(self.q_proj(hidden_states)).view(hidden_shape) + ).transpose(1, 2) + key_states = self.k_norm( + self.k_norm_input_qdq(self.k_proj(hidden_states)).view(hidden_shape) + ).transpose(1, 2) + value_states = self.v_proj(hidden_states).view(hidden_shape).transpose(1, 2) + + query_states = self.q_norm_output_qdq(query_states) + key_states = self.k_norm_output_qdq(key_states) + + cos, sin = position_embeddings + cos = cos.unsqueeze(1) + sin = sin.unsqueeze(1) + query_states = self.q_rope_add_0_output_qdq( + self.q_rope_mul_0_output_qdq(query_states * cos) + + self.q_rope_mul_1_output_qdq(rotate_half(query_states) * sin) + ) + key_states = self.k_rope_add_0_output_qdq( + self.k_rope_mul_0_output_qdq(key_states * cos) + + self.k_rope_mul_1_output_qdq(rotate_half(key_states) * sin) + ) + + key_states = self.k_cast_to_int8_qdq(key_states) + value_states = self.v_cast_to_int8_qdq(value_states) + + if past_key_values is not None: + # sin and cos are specific to RoPE models; cache_position needed for the static cache + cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position} + key_states, value_states = past_key_values.update( + key_states, value_states, self.layer_idx, cache_kwargs + ) + + key_states = repeat_kv(key_states, self.num_key_value_groups) + value_states = repeat_kv(value_states, self.num_key_value_groups) + + attn_weights = self.qk_matmul_output_qdq( + torch.matmul(query_states, key_states.transpose(2, 3)) + ) * self.scaling_qdq( + torch.ones(1, dtype=torch.bfloat16, device=value_states.device) + * self.scaling + ) + + attn_min = self.reduce_min_output_qdq( + torch.amin(attn_weights, dim=-1, keepdim=True) + ) + attn_vv = self.minus_0_output_qdq(attn_min - 20) + attn_weights = torch.where(attention_mask == 0, attn_weights, attn_vv) + + attn_weights = self.softmax_output_qdq( + nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to( + query_states.dtype + ) + ) + attn_output = self.attn_value_matmul_output_qdq( + torch.matmul(attn_weights, value_states) + ) + attn_output = attn_output.transpose(1, 2).contiguous() + + attn_output = attn_output.reshape(*input_shape, -1).contiguous() + attn_output = self.o_proj(attn_output) + return attn_output, attn_weights + + +class Qwen3DecoderLayer(GradientCheckpointingLayer): + def __init__(self, config: Qwen3Config, layer_idx: int): + super().__init__() + self.hidden_size = config.hidden_size + + self.self_attn = Qwen3Attention(config=config, layer_idx=layer_idx) + + self.mlp = Qwen3MLP(config) + self.input_layernorm = QRMSNorm( + config.hidden_size, eps=config.rms_norm_eps, quant_bits=16 + ) + self.post_attention_layernorm = QRMSNorm( + config.hidden_size, eps=config.rms_norm_eps, quant_bits=16 + ) + self.attention_type = config.layer_types[layer_idx] + + # QDQ + self.input_layernorm_input_qdq = ActivationQDQ(bits=16) + self.add_0_lhs_input_qdq = ActivationQDQ(bits=16) + self.add_0_output_qdq = ActivationQDQ(bits=16) + self.add_1_lhs_input_qdq = ActivationQDQ(bits=16) + + @deprecate_kwarg("past_key_value", new_name="past_key_values", version="4.58") + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[Cache] = None, + use_cache: Optional[bool] = False, + cache_position: Optional[torch.LongTensor] = None, + position_embeddings: Optional[ + tuple[torch.Tensor, torch.Tensor] + ] = None, # necessary, but kept here for BC + **kwargs: Unpack[TransformersKwargs], + ) -> torch.Tensor: + hidden_states = self.input_layernorm_input_qdq(hidden_states) + residual = hidden_states + hidden_states = self.input_layernorm(hidden_states) + # Self Attention + hidden_states, _ = self.self_attn( + hidden_states=hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + use_cache=use_cache, + cache_position=cache_position, + position_embeddings=position_embeddings, + **kwargs, + ) + hidden_states = self.add_0_output_qdq( + residual + self.add_0_lhs_input_qdq(hidden_states) + ) + + # Fully Connected + residual = hidden_states + hidden_states = self.post_attention_layernorm(hidden_states) + hidden_states = self.mlp(hidden_states) + hidden_states = residual + self.add_1_lhs_input_qdq(hidden_states) + return hidden_states + + +@auto_docstring +class Qwen3PreTrainedModel(PreTrainedModel): + config: Qwen3Config + base_model_prefix = "model" + supports_gradient_checkpointing = True + _no_split_modules = ["Qwen3DecoderLayer"] + _skip_keys_device_placement = ["past_key_values"] + _supports_flash_attn = True + _supports_sdpa = True + _supports_flex_attn = True + + _can_compile_fullgraph = True + _supports_attention_backend = True + _can_record_outputs = { + "hidden_states": Qwen3DecoderLayer, + "attentions": Qwen3Attention, + } + + +class Qwen3RotaryEmbedding(nn.Module): + inv_freq: torch.Tensor # fix linting for `register_buffer` + + def __init__(self, config: Qwen3Config, device=None): + super().__init__() + # BC: "rope_type" was originally "type" + if hasattr(config, "rope_scaling") and isinstance(config.rope_scaling, dict): + self.rope_type = config.rope_scaling.get( + "rope_type", config.rope_scaling.get("type") + ) + else: + self.rope_type = "default" + self.max_seq_len_cached = config.max_position_embeddings + self.original_max_seq_len = config.max_position_embeddings + + self.config = config + self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type] + + inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device) + self.register_buffer("inv_freq", inv_freq, persistent=False) + self.original_inv_freq = self.inv_freq + + @torch.no_grad() + @dynamic_rope_update # power user: used with advanced RoPE types (e.g. dynamic rope) + def forward(self, x, position_ids): + inv_freq_expanded = ( + self.inv_freq[None, :, None] + .float() + .expand(position_ids.shape[0], -1, 1) + .to(x.device) + ) + position_ids_expanded = position_ids[:, None, :].float() + + device_type = ( + x.device.type + if isinstance(x.device.type, str) and x.device.type != "mps" + else "cpu" + ) + with torch.autocast(device_type=device_type, enabled=False): # Force float32 + freqs = ( + inv_freq_expanded.float() @ position_ids_expanded.float() + ).transpose(1, 2) + emb = torch.cat((freqs, freqs), dim=-1) + cos = emb.cos() * self.attention_scaling + sin = emb.sin() * self.attention_scaling + + return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype) + + +@auto_docstring +class Qwen3Model(Qwen3PreTrainedModel): + def __init__(self, config: Qwen3Config): + super().__init__(config) + self.padding_idx = config.pad_token_id + self.vocab_size = config.vocab_size + + self.embed_tokens = nn.Embedding( + config.vocab_size, config.hidden_size, self.padding_idx + ) + self.layers = nn.ModuleList( + [ + Qwen3DecoderLayer(config, layer_idx) + for layer_idx in range(config.num_hidden_layers) + ] + ) + self.norm = QRMSNorm(config.hidden_size, eps=config.rms_norm_eps, quant_bits=16) + self.rotary_emb = Qwen3RotaryEmbedding(config=config) + self.gradient_checkpointing = False + self.has_sliding_layers = "sliding_attention" in self.config.layer_types + + # Register sin and cos as buffers + self.register_buffer("mllm_max_sin_embedding", None) + self.register_buffer("mllm_max_cos_embedding", None) + self.sin_embedding_input_qdq = ActivationQDQ(bits=16) + self.cos_embedding_input_qdq = ActivationQDQ(bits=16) + + # Initialize weights and apply final processing + self.post_init() + + @check_model_inputs() + @auto_docstring + def forward( + self, + input_ids: Optional[torch.LongTensor] = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[Cache] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + use_cache: Optional[bool] = None, + cache_position: Optional[torch.LongTensor] = None, + **kwargs: Unpack[TransformersKwargs], + ) -> BaseModelOutputWithPast: + if (input_ids is None) ^ (inputs_embeds is not None): + raise ValueError( + "You must specify exactly one of input_ids or inputs_embeds" + ) + + if inputs_embeds is None: + inputs_embeds = self.embed_tokens(input_ids) + + if use_cache and past_key_values is None: + past_key_values = DynamicCache(config=self.config) + + if cache_position is None: + past_seen_tokens = ( + past_key_values.get_seq_length() if past_key_values is not None else 0 + ) + cache_position = torch.arange( + past_seen_tokens, + past_seen_tokens + inputs_embeds.shape[1], + device=inputs_embeds.device, + ) + + if position_ids is None: + position_ids = cache_position.unsqueeze(0) + + # It may already have been prepared by e.g. `generate` + if not isinstance(causal_mask_mapping := attention_mask, dict): + # Prepare mask arguments + mask_kwargs = { + "config": self.config, + "input_embeds": inputs_embeds, + "attention_mask": attention_mask, + "cache_position": cache_position, + "past_key_values": past_key_values, + "position_ids": position_ids, + } + # Create the masks + causal_mask_mapping = { + "full_attention": create_causal_mask(**mask_kwargs), + } + # The sliding window alternating layers are not always activated depending on the config + if self.has_sliding_layers: + causal_mask_mapping["sliding_attention"] = ( + create_sliding_window_causal_mask(**mask_kwargs) + ) + + hidden_states = inputs_embeds + + if self.mllm_max_sin_embedding is None and self.mllm_max_cos_embedding is None: + mllm_qualcomm_max_length = kwargs.get("mllm_qualcomm_max_length", None) + assert mllm_qualcomm_max_length is not None + max_position_ids = torch.arange( + 0, + mllm_qualcomm_max_length, + dtype=position_ids.dtype, + device=position_ids.device, + ).unsqueeze(0) + self.mllm_max_cos_embedding, self.mllm_max_sin_embedding = self.rotary_emb( + hidden_states, max_position_ids + ) + self.mllm_max_cos_embedding = self.cos_embedding_input_qdq( + self.mllm_max_cos_embedding + ) + self.mllm_max_sin_embedding = self.sin_embedding_input_qdq( + self.mllm_max_sin_embedding + ) + + # create position embeddings to be shared across the decoder layers + position_embeddings = ( + self.mllm_max_cos_embedding[:, position_ids.squeeze(0), :], + self.mllm_max_sin_embedding[:, position_ids.squeeze(0), :], + ) + + # Generate causal mask based on position_ids length + # For prefill, we need a lower triangular mask + _, seq_len = input_ids.shape + if seq_len != 1: + causal_mask = 1 - torch.tril( + torch.ones(seq_len, seq_len, dtype=torch.int8, device=input_ids.device) + ) + # [1, 1, seq_len, seq_len] + causal_mask = causal_mask.unsqueeze(0).unsqueeze(0) + else: + # [1, 1, seq_len, seq_len] + causal_mask = torch.zeros( + (1, 1, 1, seq_len), dtype=torch.int8, device=input_ids.device + ) + + for decoder_layer in self.layers[: self.config.num_hidden_layers]: + hidden_states = decoder_layer( + hidden_states, + attention_mask=causal_mask, + position_ids=position_ids, + past_key_values=past_key_values, + use_cache=use_cache, + cache_position=cache_position, + position_embeddings=position_embeddings, + **kwargs, + ) + + hidden_states = self.norm(hidden_states) + return BaseModelOutputWithPast( + last_hidden_state=hidden_states, + past_key_values=past_key_values if use_cache else None, + ) + + +@auto_docstring +class Qwen3ForCausalLM(Qwen3PreTrainedModel, GenerationMixin): + _tied_weights_keys = ["lm_head.weight"] + _tp_plan = {"lm_head": "colwise_rep"} + _pp_plan = {"lm_head": (["hidden_states"], ["logits"])} + + def __init__(self, config): + super().__init__(config) + self.model = Qwen3Model(config) + self.vocab_size = config.vocab_size + self.lm_head = QLinearW8A16_PerChannelSym( + config.hidden_size, config.vocab_size, bias=False + ) + self.mllm_qualcomm_max_length = None + + # Initialize weights and apply final processing + self.post_init() + + @can_return_tuple + @auto_docstring + def forward( + self, + input_ids: Optional[torch.LongTensor] = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[Cache] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + cache_position: Optional[torch.LongTensor] = None, + logits_to_keep: Union[int, torch.Tensor] = 0, + **kwargs: Unpack[TransformersKwargs], + ) -> CausalLMOutputWithPast: + r""" + labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): + Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., + config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored + (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`. + + Example: + + ```python + >>> from transformers import AutoTokenizer, Qwen3ForCausalLM + + >>> model = Qwen3ForCausalLM.from_pretrained("Qwen/Qwen3-8B") + >>> tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B") + + >>> prompt = "Hey, are you conscious? Can you talk to me?" + >>> inputs = tokenizer(prompt, return_tensors="pt") + + >>> # Generate + >>> generate_ids = model.generate(inputs.input_ids, max_length=30) + >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] + "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you." + ```""" + kwargs.update({"mllm_qualcomm_max_length": self.mllm_qualcomm_max_length}) + outputs: BaseModelOutputWithPast = self.model( + input_ids=input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + cache_position=cache_position, + **kwargs, + ) + + hidden_states = outputs.last_hidden_state + # Only compute necessary logits, and do not upcast them to float if we are not computing the loss + slice_indices = ( + slice(-logits_to_keep, None) + if isinstance(logits_to_keep, int) + else logits_to_keep + ) + logits = self.lm_head(hidden_states[:, slice_indices, :]) + + loss = None + if labels is not None: + loss = self.loss_function( + logits=logits, + labels=labels, + vocab_size=self.config.vocab_size, + **kwargs, + ) + + return CausalLMOutputWithPast( + loss=loss, + logits=logits, + past_key_values=outputs.past_key_values, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) + + +class Qwen3ForSequenceClassification( + GenericForSequenceClassification, Qwen3PreTrainedModel +): + pass + + +class Qwen3ForTokenClassification(GenericForTokenClassification, Qwen3PreTrainedModel): + pass + + +class Qwen3ForQuestionAnswering(GenericForQuestionAnswering, Qwen3PreTrainedModel): + base_model_prefix = ( + "transformer" # For BC, where `transformer` was used instead of `model` + ) + + +__all__ = [ + "Qwen3ForCausalLM", + "Qwen3ForQuestionAnswering", + "Qwen3PreTrainedModel", + "Qwen3Model", + "Qwen3ForSequenceClassification", + "Qwen3ForTokenClassification", +] diff --git a/pymllm/backends/qualcomm/transformers/qwen3/runner.py b/pymllm/backends/qualcomm/transformers/qwen3/runner.py new file mode 100644 index 000000000..c2aed54c8 --- /dev/null +++ b/pymllm/backends/qualcomm/transformers/qwen3/runner.py @@ -0,0 +1,155 @@ +import torch +from tqdm import tqdm +from modelscope.msdatasets import MsDataset +from transformers import AutoTokenizer +from pymllm.backends.qualcomm.transformers.core.qdq import ActivationQDQ +from pymllm.backends.qualcomm.transformers.core.rms_norm import QRMSNorm +from pymllm.backends.qualcomm.transformers.core.qlinear import ( + QLinearLPBQ, + QLinearW8A16_PerChannelSym, +) +from pymllm.backends.qualcomm.transformers.qwen3.modeling_qwen3 import Qwen3ForCausalLM + + +def freeze_qwen3_rmsnorm_weight(m): + if isinstance(m, QRMSNorm): + m.freeze_weight() + + +def freeze_qwen3_linear_weight(m): + if isinstance(m, QLinearLPBQ) or isinstance(m, QLinearW8A16_PerChannelSym): + m.freeze_weight() + + +def disable_qdq_observer(m): + if isinstance(m, ActivationQDQ): + m.disable_observer() + + +def enable_qdq_observer(m): + if isinstance(m, ActivationQDQ): + m.enable_observer() + + +class Qwen3Quantizer: + def __init__(self, model_path: str, mllm_qualcomm_max_length=2048): + self.tokenizer = AutoTokenizer.from_pretrained(model_path) + self.model = Qwen3ForCausalLM.from_pretrained( + model_path, + attn_implementation="eager", + ) + self.mllm_qualcomm_max_length = mllm_qualcomm_max_length + self.model.mllm_qualcomm_max_length = mllm_qualcomm_max_length + + # PTQ All Weights. + self.model.apply(freeze_qwen3_rmsnorm_weight) + self.model.apply(freeze_qwen3_linear_weight) + print("All PTQ weights preparation done.") + + def freeze_activation(self): + self.model.apply(disable_qdq_observer) + + def enable_activation_update(self): + self.model.apply(enable_qdq_observer) + + def infer(self, prompt: str): + messages = [{"role": "user", "content": prompt}] + text = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + enable_thinking=False, # Switches between thinking and non-thinking modes. Default is True. + ) + model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device) + + # conduct text completion + generated_ids = self.model.generate( + **model_inputs, + max_new_tokens=self.mllm_qualcomm_max_length + - len(model_inputs.input_ids[0]) + - 1, + do_sample=False, + temperature=None, + top_p=None, + top_k=None, + ) + output_ids = generated_ids[0][len(model_inputs.input_ids[0]) :].tolist() + + # parsing thinking content + try: + # rindex finding 151668 () + index = len(output_ids) - output_ids[::-1].index(151668) + except ValueError: + index = 0 + + thinking_content = self.tokenizer.decode( + output_ids[:index], skip_special_tokens=True + ).strip("\n") + content = self.tokenizer.decode( + output_ids[index:], skip_special_tokens=True + ).strip("\n") + + print("thinking content:", thinking_content) + print("content:", content) + + def calibrate(self, num_samples=64, max_seq_length=512): + """ + Perform calibration using Wikipedia dataset (PTQ) + :param num_samples: Number of samples for calibration + :param max_seq_length: Maximum length for each sample (not exceeding mllm_qualcomm_max_length) + """ + print( + f"Starting calibration, samples: {num_samples}, max length: {max_seq_length}" + ) + + # 1. Enable QDQ Observer for activation values + self.enable_activation_update() + self.model.eval() + + # 2. Load Wikipedia dataset (English version example) + # Use streaming=True to download and process on the fly, without downloading the full几十G dataset + dataset = MsDataset.load( + "modelscope/wikitext", + subset_name="wikitext-103-v1", + split="train", + trust_remote_code=True, + ) + + # 3. Execute forward pass (Prefill stage) + samples_processed = 0 + + # Ensure no gradient calculation during inference + with torch.no_grad(): + pbar = tqdm(total=num_samples, desc="Calibrating") + for entry in dataset: + if samples_processed >= num_samples: + break + + messages = [{"role": "user", "content": entry["text"]}] + text = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + enable_thinking=False, # Switches between thinking and non-thinking modes. Default is True. + ) + model_inputs = self.tokenizer([text], return_tensors="pt").to( + self.model.device + ) + + # Only need Prefill stage: directly call forward + # This will trigger observer update statistics in ActivationQDQ + self.model.generate( + **model_inputs, + max_new_tokens=1, + do_sample=False, + temperature=None, + top_p=None, + top_k=None, + ) + + samples_processed += 1 + pbar.update(1) + + # 4. Close Observer, freeze calibrated quantization parameters + self.freeze_activation() + print("\nCalibration completed, activation quantization parameters frozen.") diff --git a/pymllm/backends/qualcomm/transformers/static_qwen3.py b/pymllm/backends/qualcomm/transformers/static_qwen3.py index becbe9f48..186e312ca 100644 --- a/pymllm/backends/qualcomm/transformers/static_qwen3.py +++ b/pymllm/backends/qualcomm/transformers/static_qwen3.py @@ -7,37 +7,7 @@ QLinearLPBQ, QLinearW8A16_PerChannelSym_PerTensorSym, ) - - -# This settings below is for Qwen1.7B -class Qwen3Config: - def __init__(self): - self.attention_bias = False - self.attention_dropout = 0.0 - self.bos_token_id = 151643 - self.eos_token_id = 151645 - self.head_dim = 128 - self.hidden_act = "silu" - self.hidden_size = 2048 - self.initializer_range = 0.02 - self.intermediate_size = 6144 - self.max_position_embeddings = 40960 - self.max_window_layers = 28 - self.model_type = "qwen3" - self.num_attention_heads = 16 - self.num_hidden_layers = 28 - self.num_key_value_heads = 8 - self.pad_token_id = 151643 - self.rms_norm_eps = 1e-06 - self.rope_scaling = None - self.rope_theta = 1000000 - self.sliding_window = None - self.tie_word_embeddings = True - self.torch_dtype = "bfloat16" - self.transformers_version = "4.51.0" - self.use_cache = True - self.use_sliding_window = False - self.vocab_size = 151936 +from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer def generate_rope_cache( @@ -153,6 +123,13 @@ def __init__(self, config): self.qdq_act = QDQ_OP["A16-PerTensor"]() self.qdq_middle = QDQ_OP["A16-PerTensor"]() + def freeze_observer(self): + for name, value in self.__dict__.items(): + if isinstance(value, QDQ_OP["A16-PerTensor"]) or isinstance( + value, QDQ_OP["A8-PerTensor"] + ): + value.disable_observer() + def forward(self, x): """ input: @@ -163,7 +140,7 @@ def forward(self, x): x = self.qdq_x(x) up_result = self.qdq_up_result(self.up_proj(x)) gate_result = self.qdq_gate_result(self.gate_proj(x)) - up_result = self.qdq_act(self.act_fn(up_result)) + gate_result = self.qdq_act(self.act_fn(gate_result)) o = self.qdq_middle(gate_result * up_result) o = self.down_proj(o) return o @@ -236,6 +213,13 @@ def __init__(self, config, layer_idx: int): self.k_cache = None self.v_cache = None + def freeze_observer(self): + for name, value in self.__dict__.items(): + if isinstance(value, QDQ_OP["A16-PerTensor"]) or isinstance( + value, QDQ_OP["A8-PerTensor"] + ): + value.disable_observer() + def forward( self, hidden_states: torch.Tensor, @@ -258,6 +242,7 @@ def forward( query_states = ( self.q_proj(quantized_hidden_states).view(hidden_shape).transpose(1, 2) ) + key_states = ( self.k_proj(quantized_hidden_states).view(hidden_shape).transpose(1, 2) ) @@ -289,6 +274,9 @@ def forward( + self.qdq_rope_5(rot_k * sin_embedding) ) + key_states = repeat_kv(key_states, self.num_key_value_groups) + value_states = repeat_kv(value_states, self.num_key_value_groups) + key_states = self.qdq_4(key_states) # [B, H, D, S] key_states = key_states.transpose(2, 3) @@ -307,12 +295,11 @@ def forward( self.k_cache = key_states self.v_cache = value_states - key_states = repeat_kv(key_states, self.num_key_value_groups) - value_states = repeat_kv(value_states, self.num_key_value_groups) - attn = query_states @ key_states attn = self.qdq_5(attn) - attn = attn / self.qdq_6(torch.ones(1, dtype=torch.bfloat16) * self.scaling) + attn = attn / self.qdq_6( + torch.ones(1, dtype=torch.bfloat16, device=attn.device) * self.scaling + ) attn = self.qdq_7(attn) attn_min = torch.amin(attn, dim=-1, keepdim=True) attn_min = self.qdq_8(attn_min) @@ -320,12 +307,17 @@ def forward( attn_vv = self.qdq_9(attn_vv) attn = torch.where(causal_mask == 0, attn, attn_vv) attn = self.qdq_10(attn) - attn = F.softmax(attn, -1) + attn = F.softmax(attn.to(torch.float32), -1).to(torch.bfloat16) + print(attn) + exit(0) attn = self.qdq_11(attn) y = attn @ value_states y = self.qdq_12(y) y = y.transpose(1, 2).reshape(bsz, seq_len, -1) y = self.o_proj(y) + print(y.shape) + print(y) + exit(0) return y @@ -345,6 +337,17 @@ def __init__(self, config, layer_idx: int): self.qdq_2 = QDQ_OP["A16-PerTensor"]() self.qdq_3 = QDQ_OP["A16-PerTensor"]() + def freeze_observer(self): + self.mlp.freeze_observer() + self.self_attn.freeze_observer() + self.input_layernorm.freeze_observer() + self.post_attention_layernorm.freeze_observer() + for name, value in self.__dict__.items(): + if isinstance(value, QDQ_OP["A16-PerTensor"]) or isinstance( + value, QDQ_OP["A8-PerTensor"] + ): + value.disable_observer() + def forward( self, hidden_states: torch.Tensor, @@ -396,8 +399,18 @@ def __init__(self, config): self.norm = QRMSNorm(config.hidden_size, eps=config.rms_norm_eps) self.qdq_0 = QDQ_OP["A16-PerTensor"]() + def freeze_observer(self): + self.norm.freeze_observer() + for item in self.layers: + item.freeze_observer() + for name, value in self.__dict__.items(): + if isinstance(value, QDQ_OP["A16-PerTensor"]) or isinstance( + value, QDQ_OP["A8-PerTensor"] + ): + value.disable_observer() + def forward(self, input_ids, sin, cos, causal_mask): - inputs_embeds = self.embed_tokens(input_ids) + inputs_embeds = self.embed_tokens(input_ids).to(torch.bfloat16) hidden_states = inputs_embeds for decoder_layer in self.layers[: self.config.num_hidden_layers]: @@ -407,8 +420,9 @@ def forward(self, input_ids, sin, cos, causal_mask): return hidden_states -class Qwen3ForCausalLM: +class Qwen3ForCausalLM(nn.Module): def __init__(self, config): + super().__init__() self.config = config self.model = Qwen3Model(config) self.vocab_size = config.vocab_size @@ -423,6 +437,22 @@ def __init__(self, config): self.register_buffer("sin", None) self.register_buffer("cos", None) + def freeze_observer(self): + self.model.freeze_observer() + for name, value in self.__dict__.items(): + if isinstance(value, QDQ_OP["A16-PerTensor"]) or isinstance( + value, QDQ_OP["A8-PerTensor"] + ): + value.disable_observer() + + def disable_fakequant(self): + # self.model.disable_fakequant() + for name, value in self.__dict__.items(): + if isinstance(value, QDQ_OP["A16-PerTensor"]) or isinstance( + value, QDQ_OP["A8-PerTensor"] + ): + value.disable_fakequant() + def forward( self, input_ids, @@ -466,18 +496,134 @@ def forward( logits = self.lm_head(self.qdq_0(out)) return logits - def _update_kv_cache_by_copy(self): - pass - def _freeze_observer(self): - pass +class Qwen3Quantizer: + def __init__(self): + # Other stuff + self.tokenizer: AutoTokenizer = None + self.model: Qwen3ForCausalLM = None + self.config: AutoConfig = None + + def load_from_hf(self, model_path: str, verbose: bool = False): + self.config = AutoConfig.from_pretrained(model_path) + state_dict = AutoModelForCausalLM.from_pretrained(model_path).state_dict() + self.model = Qwen3ForCausalLM(self.config) + + # Check if all original weight is in state_dict + model_keys = set(self.model.state_dict().keys()) + loaded_keys = set(state_dict.keys()) + + # 1. Keys present in model but missing in state_dict + missing_keys = model_keys - loaded_keys + if missing_keys and verbose: + print( + f"\n⚠️ Keys present in model but missing in state_dict ({len(missing_keys)} keys):" + ) + for k in sorted(missing_keys): + print(f" - {k}") + + # 2. Keys present in state_dict but unexpected in model + unexpected_keys = loaded_keys - model_keys + if unexpected_keys: + print( + f"\n⚠️ Keys present in state_dict but unexpected in model ({len(unexpected_keys)} keys):" + ) + for k in sorted(unexpected_keys): + print(f" - {k}") - def infer(self, model_path: str, prompt: str, max_length) -> str: - pass + self.model.load_state_dict(state_dict, strict=False) + self.model.cuda() + self.tokenizer = AutoTokenizer.from_pretrained(model_path) - def calibrate(self, model_path: str, dataset_path: str): + def infer(self, prompt: str, enable_fake_quant: bool = True) -> str: """ - calibrate Only on PREFILL stage !!! + Generate response for the given prompt. + + Args: + prompt: Input text prompt + + Returns: + Generated text response + """ + # Tokenize the input prompt + self.model.freeze_observer() + if not enable_fake_quant: + self.model.disable_fakequant() + if hasattr(self.tokenizer, "chat_template") and self.tokenizer.chat_template: + formatted_prompt = self.tokenizer.apply_chat_template( + [{"role": "user", "content": prompt}], + tokenize=False, + add_generation_prompt=True, + enable_thinking=False, + ) + else: + formatted_prompt = prompt + inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to("cuda") + input_ids = inputs["input_ids"] + seq_len = input_ids.shape[1] + + # Initialize position_ids + position_ids = torch.arange(seq_len, dtype=torch.long, device=input_ids.device) + position_ids = position_ids.unsqueeze(0) # Add batch dimension + + # Get max_length from config or use a default value + max_length = getattr(self.config, "max_position_embeddings", 2048) + + # TODO remove this + max_length = 8 + + # Prefill stage: process the prompt and build KV cache + with torch.no_grad(): + logits = self.model( + input_ids=input_ids, + position_ids=position_ids, + max_length=2048, + ) + + # Get the last token from prefill as the first generated token + next_token = logits[:, -1, :].argmax(dim=-1, keepdim=True) + generated_tokens = next_token.clone() + + # Decode stage: generate tokens one by one using KV cache + while generated_tokens.shape[1] < max_length: + # Update position_ids for the new token + new_position_id = position_ids[:, -1] + 1 + position_ids = new_position_id.unsqueeze(0) + + with torch.no_grad(): + logits = self.model( + input_ids=next_token, + position_ids=position_ids, + max_length=max_length, + ) + + # Get next token (greedy decoding) + next_token = logits[:, -1, :].argmax(dim=-1, keepdim=True) + + # Append generated token + generated_tokens = torch.cat([generated_tokens, next_token], dim=1) + + # Stop if EOS token is generated + if next_token.item() == self.tokenizer.eos_token_id: + break + + # Decode generated tokens to text + generated_text = self.tokenizer.decode( + generated_tokens[0], skip_special_tokens=True + ) + + return generated_text + + def calibrate(self, dataset_path: str): + """ + Calibrate Only on PREFILL stage !!! """ # Call infer after calibrate done. pass + + +if __name__ == "__main__": + quantizer = Qwen3Quantizer() + quantizer.load_from_hf("/mnt/user-ssd/shared_models/Qwen3-1.7B/") + result = quantizer.infer("hello") + print(result) diff --git a/pymllm/backends/qualcomm/transformers/train.py b/pymllm/backends/qualcomm/transformers/train.py index e69de29bb..a36416a44 100644 --- a/pymllm/backends/qualcomm/transformers/train.py +++ b/pymllm/backends/qualcomm/transformers/train.py @@ -0,0 +1,6 @@ +from pymllm.backends.qualcomm.transformers.qwen3.runner import Qwen3Quantizer + +if __name__ == "__main__": + m = Qwen3Quantizer() + m.calibrate() + m.infer("简述中国断代史") diff --git a/requirements-qnn-aot.txt b/requirements-qnn-aot.txt new file mode 100644 index 000000000..f3f435c9f --- /dev/null +++ b/requirements-qnn-aot.txt @@ -0,0 +1 @@ +addict==2.4.0 From 9ea8f9de0d2fd45c5bdaa611c0d286b769c11361 Mon Sep 17 00:00:00 2001 From: chenghuaWang <2923277184@qq.com> Date: Sat, 3 Jan 2026 05:57:28 +0000 Subject: [PATCH 05/13] feat: update AOT Qualcomm Qwen3 --- .../backends/qualcomm/transformers/README.md | 7 +- .../transformers/qwen3/modeling_qwen3.py | 28 +- .../qualcomm/transformers/qwen3/runner.py | 17 +- .../qualcomm/transformers/qwen3/train.py | 37 ++ .../qualcomm/transformers/static_qwen3.py | 629 ------------------ .../backends/qualcomm/transformers/train.py | 6 - requirements-qnn-aot.txt | 3 + 7 files changed, 77 insertions(+), 650 deletions(-) create mode 100644 pymllm/backends/qualcomm/transformers/qwen3/train.py delete mode 100644 pymllm/backends/qualcomm/transformers/static_qwen3.py delete mode 100644 pymllm/backends/qualcomm/transformers/train.py diff --git a/pymllm/backends/qualcomm/transformers/README.md b/pymllm/backends/qualcomm/transformers/README.md index 256c60ece..9d677a86f 100644 --- a/pymllm/backends/qualcomm/transformers/README.md +++ b/pymllm/backends/qualcomm/transformers/README.md @@ -1,3 +1,8 @@ - # Transformers Quantization for Qualcomm Backend +## Qwen3 + +```shell +cd ./qwen3 +python train.py --model_path "/your/model/path/" --max_length 2048 --num_samples 128 --infer_text "为什么伟大不能被计划" +``` diff --git a/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py b/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py index 5918b5d85..1fe04f14d 100644 --- a/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py +++ b/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py @@ -202,9 +202,11 @@ def __init__(self, config: Qwen3Config, layer_idx: int): self.k_rope_add_0_output_qdq = ActivationQDQ(bits=16) self.k_cast_to_int8_qdq = ActivationQDQ(bits=8) self.v_cast_to_int8_qdq = ActivationQDQ(bits=8) + self.v_cast_to_int16_qdq = ActivationQDQ(bits=16) self.qk_matmul_output_qdq = ActivationQDQ(bits=16) self.scaling_qdq = ActivationQDQ(bits=16) self.reduce_min_output_qdq = ActivationQDQ(bits=16) + self.mul_0_output_qdq = ActivationQDQ(bits=16) self.minus_0_output_qdq = ActivationQDQ(bits=16) self.softmax_output_qdq = ActivationQDQ(bits=16) self.attn_value_matmul_output_qdq = ActivationQDQ(bits=16) @@ -248,7 +250,7 @@ def forward( ) key_states = self.k_cast_to_int8_qdq(key_states) - value_states = self.v_cast_to_int8_qdq(value_states) + value_states = self.v_cast_to_int8_qdq(self.v_cast_to_int16_qdq(value_states)) if past_key_values is not None: # sin and cos are specific to RoPE models; cache_position needed for the static cache @@ -260,11 +262,14 @@ def forward( key_states = repeat_kv(key_states, self.num_key_value_groups) value_states = repeat_kv(value_states, self.num_key_value_groups) - attn_weights = self.qk_matmul_output_qdq( - torch.matmul(query_states, key_states.transpose(2, 3)) - ) * self.scaling_qdq( - torch.ones(1, dtype=torch.bfloat16, device=value_states.device) - * self.scaling + attn_weights = self.mul_0_output_qdq( + self.qk_matmul_output_qdq( + torch.matmul(query_states, key_states.transpose(2, 3)) + ) + * self.scaling_qdq( + torch.ones(1, dtype=torch.bfloat16, device=value_states.device) + * self.scaling + ) ) attn_min = self.reduce_min_output_qdq( @@ -444,6 +449,7 @@ def __init__(self, config: Qwen3Config): self.register_buffer("mllm_max_cos_embedding", None) self.sin_embedding_input_qdq = ActivationQDQ(bits=16) self.cos_embedding_input_qdq = ActivationQDQ(bits=16) + self.norm_input_qdq = ActivationQDQ(bits=16) # Initialize weights and apply final processing self.post_init() @@ -560,7 +566,7 @@ def forward( **kwargs, ) - hidden_states = self.norm(hidden_states) + hidden_states = self.norm(self.norm_input_qdq(hidden_states)) return BaseModelOutputWithPast( last_hidden_state=hidden_states, past_key_values=past_key_values if use_cache else None, @@ -582,6 +588,9 @@ def __init__(self, config): ) self.mllm_qualcomm_max_length = None + self.lm_head_input_qdq = ActivationQDQ(bits=16) + self.lm_head_output_qdq = ActivationQDQ(bits=16) + # Initialize weights and apply final processing self.post_init() @@ -641,7 +650,10 @@ def forward( if isinstance(logits_to_keep, int) else logits_to_keep ) - logits = self.lm_head(hidden_states[:, slice_indices, :]) + logits = self.lm_head( + self.lm_head_input_qdq(hidden_states[:, slice_indices, :]) + ) + logits = self.lm_head_output_qdq(logits) loss = None if labels is not None: diff --git a/pymllm/backends/qualcomm/transformers/qwen3/runner.py b/pymllm/backends/qualcomm/transformers/qwen3/runner.py index c2aed54c8..0b4462f2b 100644 --- a/pymllm/backends/qualcomm/transformers/qwen3/runner.py +++ b/pymllm/backends/qualcomm/transformers/qwen3/runner.py @@ -65,9 +65,7 @@ def infer(self, prompt: str): # conduct text completion generated_ids = self.model.generate( **model_inputs, - max_new_tokens=self.mllm_qualcomm_max_length - - len(model_inputs.input_ids[0]) - - 1, + max_new_tokens=128 - len(model_inputs.input_ids[0]) - 1, do_sample=False, temperature=None, top_p=None, @@ -125,6 +123,9 @@ def calibrate(self, num_samples=64, max_seq_length=512): if samples_processed >= num_samples: break + if len(entry["text"].strip()) < 1024: + continue + messages = [{"role": "user", "content": entry["text"]}] text = self.tokenizer.apply_chat_template( messages, @@ -132,9 +133,13 @@ def calibrate(self, num_samples=64, max_seq_length=512): add_generation_prompt=True, enable_thinking=False, # Switches between thinking and non-thinking modes. Default is True. ) - model_inputs = self.tokenizer([text], return_tensors="pt").to( - self.model.device - ) + model_inputs = self.tokenizer( + [text], + return_tensors="pt", + max_length=max_seq_length, + truncation=True, + padding=False, + ).to(self.model.device) # Only need Prefill stage: directly call forward # This will trigger observer update statistics in ActivationQDQ diff --git a/pymllm/backends/qualcomm/transformers/qwen3/train.py b/pymllm/backends/qualcomm/transformers/qwen3/train.py new file mode 100644 index 000000000..81e452903 --- /dev/null +++ b/pymllm/backends/qualcomm/transformers/qwen3/train.py @@ -0,0 +1,37 @@ +import argparse +from pymllm.backends.qualcomm.transformers.qwen3.runner import Qwen3Quantizer + + +def main(): + parser = argparse.ArgumentParser(description="Qwen3 Quantizer for Qualcomm backend") + parser.add_argument( + "--model_path", + type=str, + default="Qwen3-1.7B", + help="Path to the Qwen3 model directory", + ) + parser.add_argument( + "--max_length", + type=int, + default=2048, + help="Maximum sequence length for quantization", + ) + parser.add_argument( + "--num_samples", type=int, default=128, help="Number of samples for calibration" + ) + parser.add_argument( + "--infer_text", + type=str, + default="为什么伟大不能被计划", + help="Text to run inference on", + ) + + args = parser.parse_args() + + m = Qwen3Quantizer(args.model_path, mllm_qualcomm_max_length=args.max_length) + m.calibrate(num_samples=args.num_samples, max_seq_length=args.max_length) + m.infer(args.infer_text) + + +if __name__ == "__main__": + main() diff --git a/pymllm/backends/qualcomm/transformers/static_qwen3.py b/pymllm/backends/qualcomm/transformers/static_qwen3.py deleted file mode 100644 index 186e312ca..000000000 --- a/pymllm/backends/qualcomm/transformers/static_qwen3.py +++ /dev/null @@ -1,629 +0,0 @@ -import torch -from torch import nn -from torch.nn import functional as F -from pymllm.backends.qualcomm.transformers.core.qdq import QDQ_OP -from pymllm.backends.qualcomm.transformers.core.rms_norm import QRMSNorm -from pymllm.backends.qualcomm.transformers.core.qlinear import ( - QLinearLPBQ, - QLinearW8A16_PerChannelSym_PerTensorSym, -) -from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer - - -def generate_rope_cache( - max_length: int, - head_dim: int, - rope_theta: float, - dtype=torch.bfloat16, - device="cpu", -): - """ - Generate RoPE (Rotary Position Embedding) cache for given max_length. - - Args: - max_length: Maximum sequence length - head_dim: Dimension of each attention head - rope_theta: RoPE theta parameter (frequency base) - dtype: Data type for the embeddings - device: Device to place the embeddings on - - Returns: - tuple: (cos, sin) embeddings of shape [max_length, head_dim] - """ - inv_freq = 1.0 / ( - rope_theta - ** (torch.arange(0, head_dim, 2, dtype=torch.float32, device=device) / head_dim) - ) - t = torch.arange(max_length, dtype=torch.float32, device=device) - freqs = torch.einsum("i,j->ij", t, inv_freq) - emb = torch.cat((freqs, freqs), dim=-1) - cos = emb.cos().to(dtype) - sin = emb.sin().to(dtype) - return cos, sin - - -def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: - """ - This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch, - num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim) - """ - batch, num_key_value_heads, slen, head_dim = hidden_states.shape - if n_rep == 1: - return hidden_states - hidden_states = hidden_states[:, :, None, :, :].expand( - batch, num_key_value_heads, n_rep, slen, head_dim - ) - return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim) - - -def rotate_half(x): - """Rotates half the hidden dims of the input.""" - x1 = x[..., : x.shape[-1] // 2] - x2 = x[..., x.shape[-1] // 2 :] - return torch.cat((-x2, x1), dim=-1) - - -def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1): - """Applies Rotary Position Embedding to the query and key tensors. - - Args: - q (`torch.Tensor`): The query tensor. - k (`torch.Tensor`): The key tensor. - cos (`torch.Tensor`): The cosine part of the rotary embedding. - sin (`torch.Tensor`): The sine part of the rotary embedding. - position_ids (`torch.Tensor`, *optional*): - Deprecated and unused. - unsqueeze_dim (`int`, *optional*, defaults to 1): - The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and - sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note - that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and - k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes - cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have - the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2. - Returns: - `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding. - """ - cos = cos.unsqueeze(unsqueeze_dim) - sin = sin.unsqueeze(unsqueeze_dim) - q_embed = (q * cos) + (rotate_half(q) * sin) - k_embed = (k * cos) + (rotate_half(k) * sin) - return q_embed, k_embed - - -class Qwen3MLP(nn.Module): - def __init__(self, config): - super().__init__() - self.config = config - self.hidden_size = config.hidden_size - self.intermediate_size = config.intermediate_size - self.gate_proj = QLinearLPBQ( - self.hidden_size, - self.intermediate_size, - bias=False, - block_size=32, - ) - self.up_proj = QLinearLPBQ( - self.hidden_size, - self.intermediate_size, - bias=False, - block_size=32, - ) - self.down_proj = QLinearLPBQ( - self.intermediate_size, - self.hidden_size, - bias=False, - block_size=32, - ) - self.act_fn = nn.SiLU() - - # QDQ - self.qdq_x = QDQ_OP["A16-PerTensor"]() - self.qdq_up_result = QDQ_OP["A16-PerTensor"]() - self.qdq_gate_result = QDQ_OP["A16-PerTensor"]() - self.qdq_act = QDQ_OP["A16-PerTensor"]() - self.qdq_middle = QDQ_OP["A16-PerTensor"]() - - def freeze_observer(self): - for name, value in self.__dict__.items(): - if isinstance(value, QDQ_OP["A16-PerTensor"]) or isinstance( - value, QDQ_OP["A8-PerTensor"] - ): - value.disable_observer() - - def forward(self, x): - """ - input: - x: bf16, w/o fakequant - output: - o: bf16, w/o fakequant - """ - x = self.qdq_x(x) - up_result = self.qdq_up_result(self.up_proj(x)) - gate_result = self.qdq_gate_result(self.gate_proj(x)) - gate_result = self.qdq_act(self.act_fn(gate_result)) - o = self.qdq_middle(gate_result * up_result) - o = self.down_proj(o) - return o - - -class Qwen3Attention(nn.Module): - def __init__(self, config, layer_idx: int): - super().__init__() - self.config = config - self.layer_idx = layer_idx - self.head_dim = getattr( - config, "head_dim", config.hidden_size // config.num_attention_heads - ) - self.num_key_value_groups = ( - config.num_attention_heads // config.num_key_value_heads - ) - self.scaling = self.head_dim**-0.5 - self.q_proj = QLinearLPBQ( - config.hidden_size, - config.num_attention_heads * self.head_dim, - bias=False, - block_size=32, - ) - self.k_proj = QLinearLPBQ( - config.hidden_size, - config.num_key_value_heads * self.head_dim, - bias=False, - block_size=32, - ) - self.v_proj = QLinearLPBQ( - config.hidden_size, - config.num_key_value_heads * self.head_dim, - bias=False, - block_size=32, - ) - self.o_proj = QLinearLPBQ( - config.num_attention_heads * self.head_dim, - config.hidden_size, - bias=False, - block_size=32, - ) - self.q_norm = QRMSNorm(self.head_dim, eps=config.rms_norm_eps) - self.k_norm = QRMSNorm(self.head_dim, eps=config.rms_norm_eps) - - # QDQ - self.qdq_hidden_states = QDQ_OP["A16-PerTensor"]() - self.qdq_0 = QDQ_OP["A16-PerTensor"]() - self.qdq_1 = QDQ_OP["A16-PerTensor"]() - self.qdq_2 = QDQ_OP["A16-PerTensor"]() - self.qdq_3 = QDQ_OP["A16-PerTensor"]() - self.qdq_4 = QDQ_OP["A8-PerTensor"]() - self.qdq_5 = QDQ_OP["A16-PerTensor"]() - self.qdq_6 = QDQ_OP["A16-PerTensor"]() - self.qdq_7 = QDQ_OP["A16-PerTensor"]() - self.qdq_8 = QDQ_OP["A16-PerTensor"]() - self.qdq_9 = QDQ_OP["A16-PerTensor"]() - self.qdq_10 = QDQ_OP["A16-PerTensor"]() - self.qdq_11 = QDQ_OP["A16-PerTensor"]() - self.qdq_12 = QDQ_OP["A16-PerTensor"]() - self.qdq_13 = QDQ_OP["A16-PerTensor"]() - self.qdq_14 = QDQ_OP["A8-PerTensor"]() - - self.qdq_rope_0 = QDQ_OP["A16-PerTensor"]() - self.qdq_rope_1 = QDQ_OP["A16-PerTensor"]() - self.qdq_rope_2 = QDQ_OP["A16-PerTensor"]() - self.qdq_rope_3 = QDQ_OP["A16-PerTensor"]() - self.qdq_rope_4 = QDQ_OP["A16-PerTensor"]() - self.qdq_rope_5 = QDQ_OP["A16-PerTensor"]() - - self.k_cache = None - self.v_cache = None - - def freeze_observer(self): - for name, value in self.__dict__.items(): - if isinstance(value, QDQ_OP["A16-PerTensor"]) or isinstance( - value, QDQ_OP["A8-PerTensor"] - ): - value.disable_observer() - - def forward( - self, - hidden_states: torch.Tensor, - sin: torch.Tensor, - cos: torch.Tensor, - causal_mask: torch.Tensor, - ): - """ - input: - hidden_states: bf16, w/o fakequant - output: - o: bf16, w/o fakequant - """ - bsz, seq_len, _ = hidden_states.shape - input_shape = hidden_states.shape[:-1] - hidden_shape = (*input_shape, -1, self.head_dim) - quantized_hidden_states = self.qdq_hidden_states(hidden_states) - - # [B, H, S, D] - query_states = ( - self.q_proj(quantized_hidden_states).view(hidden_shape).transpose(1, 2) - ) - - key_states = ( - self.k_proj(quantized_hidden_states).view(hidden_shape).transpose(1, 2) - ) - value_states = ( - self.v_proj(quantized_hidden_states).view(hidden_shape).transpose(1, 2) - ) - - query_states = self.q_norm(self.qdq_0(query_states)) - query_states = self.qdq_1(query_states) - - key_states = self.k_norm(self.qdq_2(key_states)) - key_states = self.qdq_3(key_states) - - # ROPE Here - # cos = cos.unsqueeze(unsqueeze_dim) - # sin = sin.unsqueeze(unsqueeze_dim) - # q_embed = (q * cos) + (rotate_half(q) * sin) - # k_embed = (k * cos) + (rotate_half(k) * sin) - cos_embedding = cos.unsqueeze(1) - sin_embedding = sin.unsqueeze(1) - rot_q = rotate_half(query_states) - rot_k = rotate_half(key_states) - query_states = self.qdq_rope_0( - self.qdq_rope_1(query_states * cos_embedding) - + self.qdq_rope_2(rot_q * sin_embedding) - ) - key_states = self.qdq_rope_3( - self.qdq_rope_4(key_states * cos_embedding) - + self.qdq_rope_5(rot_k * sin_embedding) - ) - - key_states = repeat_kv(key_states, self.num_key_value_groups) - value_states = repeat_kv(value_states, self.num_key_value_groups) - - key_states = self.qdq_4(key_states) - # [B, H, D, S] - key_states = key_states.transpose(2, 3) - # [B, H, S, D] - value_states = self.qdq_14(self.qdq_13(value_states)) - - # KV Cache Here - if seq_len > 1 and self.k_cache is not None and self.v_cache is not None: - self.k_cache = None - self.v_cache = None - - if seq_len == 1: - self.k_cache = torch.cat([self.k_cache, key_states], dim=-1) - self.v_cache = torch.cat([self.v_cache, value_states], dim=2) - else: - self.k_cache = key_states - self.v_cache = value_states - - attn = query_states @ key_states - attn = self.qdq_5(attn) - attn = attn / self.qdq_6( - torch.ones(1, dtype=torch.bfloat16, device=attn.device) * self.scaling - ) - attn = self.qdq_7(attn) - attn_min = torch.amin(attn, dim=-1, keepdim=True) - attn_min = self.qdq_8(attn_min) - attn_vv = attn_min - 20 - attn_vv = self.qdq_9(attn_vv) - attn = torch.where(causal_mask == 0, attn, attn_vv) - attn = self.qdq_10(attn) - attn = F.softmax(attn.to(torch.float32), -1).to(torch.bfloat16) - print(attn) - exit(0) - attn = self.qdq_11(attn) - y = attn @ value_states - y = self.qdq_12(y) - y = y.transpose(1, 2).reshape(bsz, seq_len, -1) - y = self.o_proj(y) - print(y.shape) - print(y) - exit(0) - return y - - -class Qwen3DecodeLayer(nn.Module): - def __init__(self, config, layer_idx: int): - super().__init__() - self.hidden_size = config.hidden_size - self.self_attn = Qwen3Attention(config=config, layer_idx=layer_idx) - self.mlp = Qwen3MLP(config) - self.input_layernorm = QRMSNorm(config.hidden_size, eps=config.rms_norm_eps) - self.post_attention_layernorm = QRMSNorm( - config.hidden_size, eps=config.rms_norm_eps - ) - - self.qdq_0 = QDQ_OP["A16-PerTensor"]() - self.qdq_1 = QDQ_OP["A16-PerTensor"]() - self.qdq_2 = QDQ_OP["A16-PerTensor"]() - self.qdq_3 = QDQ_OP["A16-PerTensor"]() - - def freeze_observer(self): - self.mlp.freeze_observer() - self.self_attn.freeze_observer() - self.input_layernorm.freeze_observer() - self.post_attention_layernorm.freeze_observer() - for name, value in self.__dict__.items(): - if isinstance(value, QDQ_OP["A16-PerTensor"]) or isinstance( - value, QDQ_OP["A8-PerTensor"] - ): - value.disable_observer() - - def forward( - self, - hidden_states: torch.Tensor, - sin: torch.Tensor, - cos: torch.Tensor, - causal_mask: torch.Tensor, - ): - """ - inputs: - hidden_states: bf16, w/o fakequant - outputs: - hidden_states: bf16, w/o fakequant - """ - hidden_states = self.qdq_0(hidden_states) - residual = hidden_states - hidden_states = self.input_layernorm(hidden_states) - # Self Attention - hidden_states = self.self_attn( - hidden_states, - sin, - cos, - causal_mask, - ) - hidden_states = self.qdq_2(residual + self.qdq_1(hidden_states)) - - # Fully Connected - residual = hidden_states - hidden_states = self.post_attention_layernorm(hidden_states) - hidden_states = self.mlp(hidden_states) - hidden_states = residual + self.qdq_3(hidden_states) - return hidden_states - - -class Qwen3Model(nn.Module): - def __init__(self, config): - super().__init__() - self.config = config - self.padding_idx = config.pad_token_id - self.vocab_size = config.vocab_size - self.embed_tokens = nn.Embedding( - config.vocab_size, config.hidden_size, self.padding_idx - ) - self.layers = nn.ModuleList( - [ - Qwen3DecodeLayer(config, layer_idx) - for layer_idx in range(config.num_hidden_layers) - ] - ) - self.norm = QRMSNorm(config.hidden_size, eps=config.rms_norm_eps) - self.qdq_0 = QDQ_OP["A16-PerTensor"]() - - def freeze_observer(self): - self.norm.freeze_observer() - for item in self.layers: - item.freeze_observer() - for name, value in self.__dict__.items(): - if isinstance(value, QDQ_OP["A16-PerTensor"]) or isinstance( - value, QDQ_OP["A8-PerTensor"] - ): - value.disable_observer() - - def forward(self, input_ids, sin, cos, causal_mask): - inputs_embeds = self.embed_tokens(input_ids).to(torch.bfloat16) - hidden_states = inputs_embeds - - for decoder_layer in self.layers[: self.config.num_hidden_layers]: - hidden_states = decoder_layer(hidden_states, sin, cos, causal_mask) - - hidden_states = self.norm(self.qdq_0(hidden_states)) - return hidden_states - - -class Qwen3ForCausalLM(nn.Module): - def __init__(self, config): - super().__init__() - self.config = config - self.model = Qwen3Model(config) - self.vocab_size = config.vocab_size - self.lm_head = QLinearW8A16_PerChannelSym_PerTensorSym( - config.hidden_size, config.vocab_size, bias=False - ) - self.qdq_0 = QDQ_OP["A16-PerTensor"]() - self.qdq_1 = QDQ_OP["A16-PerTensor"]() - self.qdq_2 = QDQ_OP["A16-PerTensor"]() - - # Register sin and cos as buffers - self.register_buffer("sin", None) - self.register_buffer("cos", None) - - def freeze_observer(self): - self.model.freeze_observer() - for name, value in self.__dict__.items(): - if isinstance(value, QDQ_OP["A16-PerTensor"]) or isinstance( - value, QDQ_OP["A8-PerTensor"] - ): - value.disable_observer() - - def disable_fakequant(self): - # self.model.disable_fakequant() - for name, value in self.__dict__.items(): - if isinstance(value, QDQ_OP["A16-PerTensor"]) or isinstance( - value, QDQ_OP["A8-PerTensor"] - ): - value.disable_fakequant() - - def forward( - self, - input_ids, - position_ids, - max_length, - ): - _, seq_len = input_ids.shape - - # Generate causal mask based on position_ids length - # For prefill, we need a lower triangular mask - if seq_len != 1: - causal_mask = 1 - torch.tril( - torch.ones(seq_len, seq_len, dtype=torch.int8, device=input_ids.device) - ) - # [1, 1, seq_len, seq_len] - causal_mask = causal_mask.unsqueeze(0).unsqueeze(0) - else: - # [1, 1, seq_len, seq_len] - causal_mask = torch.zeros( - (1, 1, 1, seq_len), dtype=torch.int8, device=input_ids.device - ) - - # Generate or use registered RoPE embeddings - if self.sin is None or self.cos is None or self.cos.shape[0] < max_length: - cos, sin = generate_rope_cache( - max_length, - head_dim=self.config.head_dim, - rope_theta=self.config.rope_theta, - dtype=torch.bfloat16, - device=input_ids.device, - ) - # Register the generated embeddings - self.sin = self.qdq_1(sin) - self.cos = self.qdq_2(cos) - - # Slice RoPE embeddings to current sequence length - cos = self.cos[position_ids] - sin = self.sin[position_ids] - - out = self.model(input_ids, sin, cos, causal_mask) - logits = self.lm_head(self.qdq_0(out)) - return logits - - -class Qwen3Quantizer: - def __init__(self): - # Other stuff - self.tokenizer: AutoTokenizer = None - self.model: Qwen3ForCausalLM = None - self.config: AutoConfig = None - - def load_from_hf(self, model_path: str, verbose: bool = False): - self.config = AutoConfig.from_pretrained(model_path) - state_dict = AutoModelForCausalLM.from_pretrained(model_path).state_dict() - self.model = Qwen3ForCausalLM(self.config) - - # Check if all original weight is in state_dict - model_keys = set(self.model.state_dict().keys()) - loaded_keys = set(state_dict.keys()) - - # 1. Keys present in model but missing in state_dict - missing_keys = model_keys - loaded_keys - if missing_keys and verbose: - print( - f"\n⚠️ Keys present in model but missing in state_dict ({len(missing_keys)} keys):" - ) - for k in sorted(missing_keys): - print(f" - {k}") - - # 2. Keys present in state_dict but unexpected in model - unexpected_keys = loaded_keys - model_keys - if unexpected_keys: - print( - f"\n⚠️ Keys present in state_dict but unexpected in model ({len(unexpected_keys)} keys):" - ) - for k in sorted(unexpected_keys): - print(f" - {k}") - - self.model.load_state_dict(state_dict, strict=False) - self.model.cuda() - self.tokenizer = AutoTokenizer.from_pretrained(model_path) - - def infer(self, prompt: str, enable_fake_quant: bool = True) -> str: - """ - Generate response for the given prompt. - - Args: - prompt: Input text prompt - - Returns: - Generated text response - """ - # Tokenize the input prompt - self.model.freeze_observer() - if not enable_fake_quant: - self.model.disable_fakequant() - if hasattr(self.tokenizer, "chat_template") and self.tokenizer.chat_template: - formatted_prompt = self.tokenizer.apply_chat_template( - [{"role": "user", "content": prompt}], - tokenize=False, - add_generation_prompt=True, - enable_thinking=False, - ) - else: - formatted_prompt = prompt - inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to("cuda") - input_ids = inputs["input_ids"] - seq_len = input_ids.shape[1] - - # Initialize position_ids - position_ids = torch.arange(seq_len, dtype=torch.long, device=input_ids.device) - position_ids = position_ids.unsqueeze(0) # Add batch dimension - - # Get max_length from config or use a default value - max_length = getattr(self.config, "max_position_embeddings", 2048) - - # TODO remove this - max_length = 8 - - # Prefill stage: process the prompt and build KV cache - with torch.no_grad(): - logits = self.model( - input_ids=input_ids, - position_ids=position_ids, - max_length=2048, - ) - - # Get the last token from prefill as the first generated token - next_token = logits[:, -1, :].argmax(dim=-1, keepdim=True) - generated_tokens = next_token.clone() - - # Decode stage: generate tokens one by one using KV cache - while generated_tokens.shape[1] < max_length: - # Update position_ids for the new token - new_position_id = position_ids[:, -1] + 1 - position_ids = new_position_id.unsqueeze(0) - - with torch.no_grad(): - logits = self.model( - input_ids=next_token, - position_ids=position_ids, - max_length=max_length, - ) - - # Get next token (greedy decoding) - next_token = logits[:, -1, :].argmax(dim=-1, keepdim=True) - - # Append generated token - generated_tokens = torch.cat([generated_tokens, next_token], dim=1) - - # Stop if EOS token is generated - if next_token.item() == self.tokenizer.eos_token_id: - break - - # Decode generated tokens to text - generated_text = self.tokenizer.decode( - generated_tokens[0], skip_special_tokens=True - ) - - return generated_text - - def calibrate(self, dataset_path: str): - """ - Calibrate Only on PREFILL stage !!! - """ - # Call infer after calibrate done. - pass - - -if __name__ == "__main__": - quantizer = Qwen3Quantizer() - quantizer.load_from_hf("/mnt/user-ssd/shared_models/Qwen3-1.7B/") - result = quantizer.infer("hello") - print(result) diff --git a/pymllm/backends/qualcomm/transformers/train.py b/pymllm/backends/qualcomm/transformers/train.py deleted file mode 100644 index a36416a44..000000000 --- a/pymllm/backends/qualcomm/transformers/train.py +++ /dev/null @@ -1,6 +0,0 @@ -from pymllm.backends.qualcomm.transformers.qwen3.runner import Qwen3Quantizer - -if __name__ == "__main__": - m = Qwen3Quantizer() - m.calibrate() - m.infer("简述中国断代史") diff --git a/requirements-qnn-aot.txt b/requirements-qnn-aot.txt index f3f435c9f..813462fee 100644 --- a/requirements-qnn-aot.txt +++ b/requirements-qnn-aot.txt @@ -1 +1,4 @@ addict==2.4.0 +modelscope==1.33.0 +datasets==2.21.0 +transformers==4.57.3 From a99e5c6b7e0891cff4bd1ecceca152702326b3ec Mon Sep 17 00:00:00 2001 From: chenghuaWang <2923277184@qq.com> Date: Sat, 3 Jan 2026 06:58:18 +0000 Subject: [PATCH 06/13] fix: qnn aot, qwen3 silu int16. --- .../qualcomm/transformers/qwen3/modeling_qwen3.py | 10 ++++++++-- pymllm/backends/qualcomm/transformers/qwen3/runner.py | 4 +++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py b/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py index 1fe04f14d..f06019f2a 100644 --- a/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py +++ b/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py @@ -17,6 +17,7 @@ import torch from torch import nn +from torch.nn import functional as F from transformers.activations import ACT2FN from transformers.cache_utils import Cache, DynamicCache @@ -68,7 +69,6 @@ def __init__(self, config): self.down_proj = QLinearLPBQ( self.intermediate_size, self.hidden_size, bias=False, block_size=32 ) - self.act_fn = ACT2FN[config.hidden_act] # QDQ self.up_proj_input_qdq = ActivationQDQ(bits=16) @@ -76,12 +76,18 @@ def __init__(self, config): self.gate_proj_output_qdq = ActivationQDQ(bits=16) self.act_output_qdq = ActivationQDQ(bits=16) self.down_proj_input_qdq = ActivationQDQ(bits=16) + self.sigmoid_output_qdq = ActivationQDQ(bits=16) def forward(self, x): x = self.up_proj_input_qdq(x) up_result = self.up_proj_output_qdq(self.up_proj(x)) gate_result = self.gate_proj_output_qdq(self.gate_proj(x)) - gate_result = self.act_output_qdq(self.act_fn(gate_result)) + + # SiLU + gate_result = self.act_output_qdq( + gate_result * self.sigmoid_output_qdq(F.sigmoid(gate_result)) + ) + o = self.down_proj_input_qdq(gate_result * up_result) o = self.down_proj(o) return o diff --git a/pymllm/backends/qualcomm/transformers/qwen3/runner.py b/pymllm/backends/qualcomm/transformers/qwen3/runner.py index 0b4462f2b..082a6f0bf 100644 --- a/pymllm/backends/qualcomm/transformers/qwen3/runner.py +++ b/pymllm/backends/qualcomm/transformers/qwen3/runner.py @@ -65,7 +65,9 @@ def infer(self, prompt: str): # conduct text completion generated_ids = self.model.generate( **model_inputs, - max_new_tokens=128 - len(model_inputs.input_ids[0]) - 1, + max_new_tokens=self.mllm_qualcomm_max_length + - len(model_inputs.input_ids[0]) + - 1, do_sample=False, temperature=None, top_p=None, From 87746230f6a6dc099f74725bbc0b79ab3ffbb068 Mon Sep 17 00:00:00 2001 From: chenghuaWang <2923277184@qq.com> Date: Sat, 3 Jan 2026 15:21:42 +0000 Subject: [PATCH 07/13] fix: Save Qnn Qwen3 AOT Model. --- .../backends/qualcomm/transformers/qwen3/runner.py | 7 +++++++ pymllm/backends/qualcomm/transformers/qwen3/train.py | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/pymllm/backends/qualcomm/transformers/qwen3/runner.py b/pymllm/backends/qualcomm/transformers/qwen3/runner.py index 082a6f0bf..37f8bae16 100644 --- a/pymllm/backends/qualcomm/transformers/qwen3/runner.py +++ b/pymllm/backends/qualcomm/transformers/qwen3/runner.py @@ -52,6 +52,13 @@ def freeze_activation(self): def enable_activation_update(self): self.model.apply(enable_qdq_observer) + def compile(self): + print("Compile Start.") + self.model = torch.compile( + self.model, mode="reduce-overhead", fullgraph=False, backend="inductor" + ) + print("Compile done.") + def infer(self, prompt: str): messages = [{"role": "user", "content": prompt}] text = self.tokenizer.apply_chat_template( diff --git a/pymllm/backends/qualcomm/transformers/qwen3/train.py b/pymllm/backends/qualcomm/transformers/qwen3/train.py index 81e452903..746970020 100644 --- a/pymllm/backends/qualcomm/transformers/qwen3/train.py +++ b/pymllm/backends/qualcomm/transformers/qwen3/train.py @@ -1,4 +1,6 @@ +import os import argparse +from safetensors.torch import save_model from pymllm.backends.qualcomm.transformers.qwen3.runner import Qwen3Quantizer @@ -25,13 +27,23 @@ def main(): default="为什么伟大不能被计划", help="Text to run inference on", ) + parser.add_argument( + "--output_dir", + type=str, + help="Directory to save the quantized model", + ) args = parser.parse_args() m = Qwen3Quantizer(args.model_path, mllm_qualcomm_max_length=args.max_length) m.calibrate(num_samples=args.num_samples, max_seq_length=args.max_length) + # m.compile() m.infer(args.infer_text) + os.makedirs(args.output_dir, exist_ok=True) + model_save_path = os.path.join(args.output_dir, "model.safetensors") + save_model(m.model, model_save_path) + if __name__ == "__main__": main() From 00dd1b6a66bb53da3a8c1c8fe664cf57220261b9 Mon Sep 17 00:00:00 2001 From: chenghuaWang <2923277184@qq.com> Date: Sun, 4 Jan 2026 16:04:14 +0000 Subject: [PATCH 08/13] fix: PTQ pass in qualcomm AOT workflow. --- examples/qwen3_qnn_aot/compile.cpp | 16 +- .../qwen3_qnn_aot/modeling_qwen_qnn_aot.hpp | 205 +- examples/qwen3_qnn_aot/qwen3_qnn_aot.mir | 3500 +++++++++-------- mllm/backends/cpu/CPUBackend.cpp | 11 +- mllm/backends/cpu/kernels/Kernels.hpp | 2 + mllm/backends/cpu/kernels/arm/sigmoid.cpp | 131 + mllm/backends/cpu/kernels/arm/sigmoid.hpp | 18 + mllm/backends/cpu/kernels/x86/sigmoid.cpp | 47 + mllm/backends/cpu/kernels/x86/sigmoid.hpp | 16 + mllm/backends/cpu/ops/LinearOp.cpp | 2 +- mllm/backends/cpu/ops/SigmoidOp.cpp | 41 + mllm/backends/cpu/ops/SigmoidOp.hpp | 25 + .../qnn/aot/passes/AOTCompileContext.cpp | 4 + .../qnn/aot/passes/AOTCompileContext.hpp | 6 + mllm/backends/qnn/aot/passes/AOTPipeline.cpp | 8 +- .../qnn/aot/passes/LLMQuantRecipePass.cpp | 106 +- .../qnn/aot/passes/LLMQuantRecipePass.hpp | 38 + mllm/backends/qnn/aot/passes/PTQPass.cpp | 29 + mllm/backends/qnn/aot/passes/PTQPass.hpp | 32 + mllm/compile/ir/GeneratedRTTIKind.hpp | 3 +- mllm/compile/ir/NodeRTTIClassOfImpl.hpp | 5 +- mllm/compile/ir/linalg/Attribute.hpp | 1 + mllm/compile/ir/linalg/Op.cpp | 1 + mllm/compile/ir/linalg/Op.hpp | 2 + mllm/compile/ir/rtti_kind_gen.py | 1 + mllm/core/DataTypes.cpp | 11 +- mllm/core/OpTypes.hpp | 2 + mllm/core/Tensor.cpp | 52 +- mllm/core/Tensor.hpp | 11 +- mllm/core/TensorViewImpl.hpp | 3 + mllm/core/aops/ElewiseOps.cpp | 2 +- mllm/core/aops/ParamOp.cpp | 12 +- mllm/core/aops/SigmoidOp.cpp | 37 + mllm/core/aops/SigmoidOp.hpp | 33 + mllm/nn/Functional.cpp | 6 + mllm/nn/Functional.hpp | 2 + mllm/nn/Module.cpp | 9 +- mllm/nn/Module.hpp | 6 +- .../qualcomm/transformers/core/qdq.py | 56 +- .../transformers/qwen3/modeling_qwen3.py | 23 +- .../qualcomm/transformers/qwen3/train.py | 4 + pymllm/quantize/pipeline.py | 6 + pymllm/utils/mllm_convertor.py | 18 + 43 files changed, 2658 insertions(+), 1885 deletions(-) create mode 100644 mllm/backends/cpu/kernels/arm/sigmoid.cpp create mode 100644 mllm/backends/cpu/kernels/arm/sigmoid.hpp create mode 100644 mllm/backends/cpu/kernels/x86/sigmoid.cpp create mode 100644 mllm/backends/cpu/kernels/x86/sigmoid.hpp create mode 100644 mllm/backends/cpu/ops/SigmoidOp.cpp create mode 100644 mllm/backends/cpu/ops/SigmoidOp.hpp create mode 100644 mllm/core/aops/SigmoidOp.cpp create mode 100644 mllm/core/aops/SigmoidOp.hpp diff --git a/examples/qwen3_qnn_aot/compile.cpp b/examples/qwen3_qnn_aot/compile.cpp index 64bf41194..26f10be05 100644 --- a/examples/qwen3_qnn_aot/compile.cpp +++ b/examples/qwen3_qnn_aot/compile.cpp @@ -41,14 +41,12 @@ MLLM_MAIN({ // Gen sin and cos { auto inv = mllm::models::qwen3::makeRoPEInvFreq(model_cfg.head_dim, model_cfg.rope_theta); - auto position_ids = mllm::Tensor::empty({1, CL}, mllm::kInt64, mllm::kCPU).alloc(); - auto position_ids_ptr = position_ids.ptr(); - for (int b = 0; b < 1; ++b) { - for (int s = 0; s < CL; ++s) { position_ids_ptr[b * CL + s] = s; } - } + auto position_ids = mllm::Tensor::empty({CL}, mllm::kInt32, mllm::kCPU).alloc(); + auto position_ids_ptr = position_ids.ptr(); + for (int s = 0; s < CL; ++s) { position_ids_ptr[s] = s; } auto [rope_sin, rope_cos] = mllm::models::qwen3::makeRotaryPosEmbedding(position_ids, inv, 1.f); - params->push("rope_sin", rope_sin.to(mllm::kInt16PerTensorSym)); - params->push("rope_cos", rope_cos.to(mllm::kInt16PerTensorSym)); + params->push("rope_sin", rope_sin.to(mllm::kUInt16PerTensorSym).setMemType(mllm::kParamsNormal).setName("rope_sin")); + params->push("rope_cos", rope_cos.to(mllm::kUInt16PerTensorSym).setMemType(mllm::kParamsNormal).setName("rope_cos")); } model.load(params); @@ -56,7 +54,7 @@ MLLM_MAIN({ // past_key_i: [B, H, D, CL-N] for each layer i // past_value_i: [B, H, CL-N, D] for each layer i // causal_mask: [B, 1, N, CL] - auto sequence = mllm::Tensor::zeros({1, N}, mllm::kInt64); + auto sequence = mllm::Tensor::zeros({1, N}, mllm::kInt32); auto causal_mask = mllm::Tensor::zeros({1, 1, N, CL}, mllm::kUInt16); // Create KV cache inputs for all layers @@ -75,7 +73,7 @@ MLLM_MAIN({ model_cfg.head_dim, CL - N, }, mllm::kInt8PerTensorSym); - trace_inputs[past_value_name] = mllm::Tensor::empty({1, model_cfg.num_key_value_heads, CL - N, model_cfg.head_dim}, mllm::kInt8PerTensorSym); + trace_inputs[past_value_name] = mllm::Tensor::empty({1, model_cfg.num_key_value_heads, CL - N, model_cfg.head_dim}, mllm::kUInt8PerTensorSym); // clang-format on } diff --git a/examples/qwen3_qnn_aot/modeling_qwen_qnn_aot.hpp b/examples/qwen3_qnn_aot/modeling_qwen_qnn_aot.hpp index e78e34c60..1f0da38e7 100644 --- a/examples/qwen3_qnn_aot/modeling_qwen_qnn_aot.hpp +++ b/examples/qwen3_qnn_aot/modeling_qwen_qnn_aot.hpp @@ -4,16 +4,92 @@ #pragma once #include "mllm/mllm.hpp" -#include "mllm/nn/Module.hpp" #include "mllm/nn/Nn.hpp" +#include "mllm/nn/Module.hpp" #include "mllm/nn/Functional.hpp" -#include "mllm/models/qwen3/configuration_qwen3.hpp" +#include "mllm/core/DataTypes.hpp" #include "mllm/utils/Enumerate.hpp" -#include "mllm/models/ARGeneration.hpp" #include "mllm/compile/ir/Trace.hpp" +#include "mllm/models/ARGeneration.hpp" +#include "mllm/models/qwen3/configuration_qwen3.hpp" namespace mllm::models::qwen3 { +Tensor rotateHalf(Tensor x) { // NOLINT + // X is [x, x, x, D] + auto D = x.size(-1); + auto x1 = x[{kAll, kAll, kAll, {kAll, D / 2}}]; + auto x2 = x[{kAll, kAll, kAll, {D / 2, kAll}}]; + return nn::functional::concat({-x2, x1}, -1); +} + +namespace ptq { + +Tensor QDQ(nn::Module* m, Tensor in, const std::string& qdq_name_in_pytorch) { + std::string scale_name = m->getModuleName() + "." + qdq_name_in_pytorch + ".fake_quant.scale"; + std::string zp_name = m->getModuleName() + "." + qdq_name_in_pytorch + ".fake_quant.zero_point"; + + if (m->getModuleName().empty()) { + scale_name = qdq_name_in_pytorch + ".fake_quant.scale"; + zp_name = qdq_name_in_pytorch + ".fake_quant.zero_point"; + } else { + scale_name = m->getModuleName() + "." + qdq_name_in_pytorch + ".fake_quant.scale"; + zp_name = m->getModuleName() + "." + qdq_name_in_pytorch + ".fake_quant.zero_point"; + } + + switch (in.dtype()) { + case kUInt16PerTensorAsy: { + auto scale = m->getTopParameterFile()->pull(scale_name); + auto zp = m->getTopParameterFile()->pull(zp_name); + in.attach("scale", scale.impl()); + in.attach("zero_point", zp.impl()); + break; + } + // For Constant! + case kFloat32: { + MLLM_RT_ASSERT_EQ(in.rank(), 1); + MLLM_RT_ASSERT_EQ(in.size(-1), 1); + auto scale = m->getTopParameterFile()->pull(scale_name); + auto zp = m->getTopParameterFile()->pull(zp_name); + in.attach("scale", scale.impl()); + in.attach("zero_point", zp.impl()); + break; + } + default: { + MLLM_ERROR_EXIT(ExitCode::kCoreError, "Can't Process dtype={}", nameOfType(in.dtype())); + } + } + + return in; +} + +Tensor QDQ_KV(nn::Module* m, Tensor in, const std::string& qdq_name_in_pytorch) { + auto scale_name = m->getModuleName() + "." + qdq_name_in_pytorch + ".fake_quant.scale"; + auto zp_name = m->getModuleName() + "." + qdq_name_in_pytorch + ".fake_quant.zero_point"; + + // The inputs is int8 sym. which means zero_point should be changed. + switch (in.dtype()) { + case kUInt8PerTensorSym: { + auto scale = m->getTopParameterFile()->pull(scale_name); + auto zp = m->getTopParameterFile()->pull(zp_name); + MLLM_RT_ASSERT_EQ(zp.item(), 0); + + // Is 128! not 127! + auto new_zp = Tensor::constant(128, kInt32).setName(zp_name).setMemType(kParamsNormal); + in.attach("scale", scale.impl()); + in.attach("zero_point", new_zp.impl()); + break; + } + default: { + MLLM_ERROR_EXIT(ExitCode::kCoreError, "Can't Process dtype={}", nameOfType(in.dtype())); + } + } + + return in; +} + +} // namespace ptq + inline auto makeRoPEInvFreq(int output_dim, float rope_theta) -> Tensor { auto inv_freq = Tensor::empty({output_dim / 2}, kFloat32, kCPU).alloc(); auto inv_freq_ptr = inv_freq.ptr(); @@ -86,12 +162,19 @@ class Qwen3MLP final : public nn::Module { } std::vector forward(const std::vector& inputs, const std::vector& args) override { - auto x = gate_proj_(inputs[0]); - x = silu_(x); - auto y = up_proj_(inputs[0]); - x = x * y; - x = down_proj_(x); - return {x}; + auto x = inputs[0]; + x = ptq::QDQ(this, x, "up_proj_input_qdq"); + auto up_result = ptq::QDQ(this, up_proj_(x), "up_proj_output_qdq"); + auto gate_result = ptq::QDQ(this, gate_proj_(x), "gate_proj_output_qdq"); + + // SiLU + gate_result = ptq::QDQ(this, (gate_result * ptq::QDQ(this, nn::functional::sigmoid(gate_result), "sigmoid_output_qdq")), + "act_output_qdq"); + + auto o = ptq::QDQ(this, gate_result * up_result, "down_proj_input_qdq"); + o = down_proj_(o); + + return {o}; } }; @@ -102,8 +185,6 @@ class Qwen3Attention final : public nn::Module { nn::Linear o_proj_; nn::RMSNorm rms_norm_q_; nn::RMSNorm rms_norm_k_; - nn::RoPE q_rope_; - nn::RoPE k_rope_; nn::CausalMask mask_; nn::Softmax softmax_; @@ -135,25 +216,24 @@ class Qwen3Attention final : public nn::Module { rms_norm_q_ = reg("q_norm", cfg.rms_norm_eps); rms_norm_k_ = reg("k_norm", cfg.rms_norm_eps); - q_rope_ = reg("q_rope", cfg.rope_theta, cfg.max_position_embeddings); - k_rope_ = reg("k_rope", cfg.rope_theta, cfg.max_position_embeddings); - mask_ = reg("mask"); softmax_ = reg("softmax", -1); } std::vector forward(const std::vector& inputs, const std::vector& args) override { - auto x = inputs[0]; + auto hidden_states = inputs[0]; auto llm_embedding_sin = inputs[1]; auto llm_embedding_cos = inputs[2]; auto causal_mask = inputs[3]; auto past_key = inputs[4]; auto past_value = inputs[5]; + hidden_states = ptq::QDQ(this, hidden_states, "q_proj_input_qdq"); + // [B, S, H * D] - auto query_states = q_proj_(x); - auto key_states = k_proj_(x); - auto value_states = v_proj_(x); + auto query_states = q_proj_(hidden_states); + auto key_states = k_proj_(hidden_states); + auto value_states = v_proj_(hidden_states); // [B, H, S, D] query_states = query_states.view({1, -1, num_attention_heads_, head_dim_}).transpose(1, 2); @@ -161,23 +241,38 @@ class Qwen3Attention final : public nn::Module { value_states = value_states.view({1, -1, num_key_value_heads_, head_dim_}).transpose(1, 2); // [B, H, S, D] - query_states = rms_norm_q_(query_states); - key_states = rms_norm_k_(key_states); + query_states = rms_norm_q_(ptq::QDQ(this, query_states, "q_norm_input_qdq")); + key_states = rms_norm_k_(ptq::QDQ(this, key_states, "k_norm_input_qdq")); + + query_states = ptq::QDQ(this, query_states, "q_norm_output_qdq"); + key_states = ptq::QDQ(this, key_states, "k_norm_output_qdq"); // [B, H, S, D] - query_states = q_rope_(query_states, llm_embedding_sin, llm_embedding_cos); - key_states = k_rope_(key_states, llm_embedding_sin, llm_embedding_cos); + auto cos = llm_embedding_cos.unsqueeze(1); + auto sin = llm_embedding_sin.unsqueeze(1); + query_states = ptq::QDQ(this, + ptq::QDQ(this, query_states * cos, "q_rope_mul_0_output_qdq") + + ptq::QDQ(this, rotateHalf(query_states) * sin, "q_rope_mul_1_output_qdq"), + "q_rope_add_0_output_qdq"); + key_states = ptq::QDQ(this, + ptq::QDQ(this, key_states * cos, "k_rope_mul_0_output_qdq") + + ptq::QDQ(this, rotateHalf(key_states) * sin, "k_rope_mul_1_output_qdq"), + "k_rope_add_0_output_qdq"); // De-quantization and quantization again key_states = key_states.to(kFloat16); - key_states = key_states.to(kInt8PerTensorSym); + key_states = key_states.to(kUInt8PerTensorSym); + key_states = ptq::QDQ_KV(this, key_states, "k_cast_to_int8_qdq"); // [B, H, D, S] key_states = key_states.transpose(2, 3); // Handle KV Cache + value_states = ptq::QDQ(this, value_states, "v_cast_to_int16_qdq"); value_states = value_states.to(kFloat16); - value_states = value_states.to(kInt8PerTensorSym); + value_states = value_states.to(kUInt8PerTensorSym); + value_states = ptq::QDQ_KV(this, value_states, "v_cast_to_int8_qdq"); + auto kh = nn::functional::concat({past_key, key_states}, -1); // [B, H, D, S] auto vh = nn::functional::concat({past_value, value_states}, 2); // [B, H, S, D] @@ -186,15 +281,18 @@ class Qwen3Attention final : public nn::Module { vh = vh.repeat(num_key_value_groups_, 1); // Attn - auto attn = nn::functional::matmul(query_states, kh); - attn = attn.mul(scale_, kFloat32); + auto attn = ptq::QDQ(this, nn::functional::matmul(query_states, kh), "qk_matmul_output_qdq"); + auto scale = Tensor::constant(scale_, kFloat32); + scale = ptq::QDQ(this, scale, "scaling_qdq"); + attn = ptq::QDQ(this, attn.mulConstant(scale), "mul_0_output_qdq"); // Masked Softmax - auto attn_min = attn.min(-1, true); - float minus_value = -20; - attn = nn::functional::where(causal_mask.equal(0.f), attn, attn_min.add(minus_value, kInt16)); - attn = nn::functional::softmax(attn, -1); - auto y = nn::functional::matmul(attn, vh); + auto attn_min = ptq::QDQ(this, attn.min(-1, true), "reduce_min_output_qdq"); + auto minus_value = Tensor::constant(-20, kFloat32); + minus_value = ptq::QDQ(this, minus_value, "neg_20_qdq"); + attn = nn::functional::where(causal_mask.equal(0.f), attn, attn_min.addConstant(minus_value)); + attn = ptq::QDQ(this, nn::functional::softmax(attn, -1), "softmax_output_qdq"); + auto y = ptq::QDQ(this, nn::functional::matmul(attn, vh), "attn_value_matmul_output_qdq"); y = y.transpose(1, 2).view({1, -1, num_attention_heads_ * head_dim_}); y = o_proj_(y); @@ -227,14 +325,18 @@ class Qwen3Decoder final : public nn::Module { auto past_key = inputs[4]; auto past_value = inputs[5]; - auto x = input_layer_norm_(inputs[0]); - auto _ = self_attn_(x, llm_embedding_sin, llm_embedding_cos, causal_mask, past_key, past_value); - x = _[0]; - auto tmp = x + inputs[0]; - x = post_attention_layer_norm_(tmp); - x = mlp_(x)[0]; - x = x + tmp; - return {x, _[1], _[2]}; + auto hidden_states = inputs[0]; + hidden_states = ptq::QDQ(this, hidden_states, "input_layernorm_input_qdq"); + auto residual = hidden_states; + hidden_states = input_layer_norm_(hidden_states); + auto _ = self_attn_(hidden_states, llm_embedding_sin, llm_embedding_cos, causal_mask, past_key, past_value); + hidden_states = _[0]; + hidden_states = ptq::QDQ(this, residual + ptq::QDQ(this, hidden_states, "add_0_lhs_input_qdq"), "add_0_output_qdq"); + residual = hidden_states; + hidden_states = post_attention_layer_norm_(hidden_states); + hidden_states = mlp_(hidden_states)[0]; + hidden_states = residual + ptq::QDQ(this, hidden_states, "add_1_lhs_input_qdq"); + return {hidden_states, _[1], _[2]}; } }; @@ -266,13 +368,13 @@ class Qwen3Text final : public nn::Module { auto x = embedding_(inputs[0]); // Quantization - x = x.to(kInt16PerTensorSym); + x = x.to(kUInt16PerTensorAsy); auto position_ids = inputs[1]; auto causal_mask = inputs[2]; position_ids = position_ids.squeeze(0); - auto llm_embedding_sin = rope_sin_.weight()[{{0}, position_ids, {kAll}}]; - auto llm_embedding_cos = rope_cos_.weight()[{{0}, position_ids, {kAll}}]; + auto llm_embedding_sin = rope_sin_()[{{0}, position_ids, {kAll}}]; + auto llm_embedding_cos = rope_cos_()[{{0}, position_ids, {kAll}}]; std::vector keys; std::vector values; @@ -285,7 +387,7 @@ class Qwen3Text final : public nn::Module { values.push_back(_[2]); } - x = norm_(x); + x = norm_(ptq::QDQ(this, x, "norm_input_qdq")); auto ret = std::vector{x}; for (const auto& item : keys) { ret.push_back(item); } @@ -357,17 +459,15 @@ class Qwen3ForCausalLM : public ARGeneration, public nn::Module { // For decode phase, increment the last position if (seq_len == 1) { - auto last_pos = *position_ids.offsettedPtr({0, position_ids.shape()[1] - 1}); - position_ids = Tensor::empty({batch_size, 1}, kInt64, kCPU).alloc(); - *position_ids.offsettedPtr({0, 0}) = last_pos + 1; + auto last_pos = *position_ids.offsettedPtr({0, position_ids.shape()[1] - 1}); + position_ids = Tensor::empty({batch_size, 1}, kInt32, kCPU).alloc(); + *position_ids.offsettedPtr({0, 0}) = last_pos + 1; } } else { // Generate position_ids for prefill phase - position_ids = Tensor::empty({batch_size, seq_len}, kInt64, kCPU).alloc(); - auto position_ids_ptr = position_ids.ptr(); - for (int b = 0; b < batch_size; ++b) { - for (int s = 0; s < seq_len; ++s) { position_ids_ptr[b * seq_len + s] = s; } - } + position_ids = Tensor::empty({batch_size, seq_len}, kInt32, kCPU).alloc(); + auto position_ids_ptr = position_ids.ptr(); + for (int s = 0; s < seq_len; ++s) { position_ids_ptr[s] = s; } } ir::lowlevel::traceStart(); @@ -377,7 +477,8 @@ class Qwen3ForCausalLM : public ARGeneration, public nn::Module { llm_inputs.insert(llm_inputs.end(), kv_caches.begin(), kv_caches.end()); sequence = llm(llm_inputs)[0]; - sequence = lm_head_(sequence); + sequence = lm_head_(ptq::QDQ(this, sequence, "lm_head_input_qdq")); + ptq::QDQ(this, sequence, "lm_head_output_qdq"); ir::lowlevel::traceComment(" ╔═════╗ "); ir::lowlevel::traceComment(" ║ o o ║ "); ir::lowlevel::traceComment(" ║ ▽ ║ "); diff --git a/examples/qwen3_qnn_aot/qwen3_qnn_aot.mir b/examples/qwen3_qnn_aot/qwen3_qnn_aot.mir index 6ca20f7af..1caff3b4a 100644 --- a/examples/qwen3_qnn_aot/qwen3_qnn_aot.mir +++ b/examples/qwen3_qnn_aot/qwen3_qnn_aot.mir @@ -1,317 +1,319 @@ @main () -> () { graph.SubGraphOp @init [symbol:init] { () -> () { - tensor.CPU.register () -> (%105:tensor<[151936, 2048], Float32, CPU>[@model.embed_tokens.weight][symbol:model.embed_tokens.weight])[symbol:model.embed_tokens.weight] - tensor.CPU.register () -> (%199:tensor<[2048], Float32, CPU>[@model.layers.0.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66), symbol:model.layers.0.input_layernorm.weight])[symbol:model.layers.0.input_layernorm.weight] - tensor.CPU.register () -> (%76:tensor<[2048, 2048], Float32, CPU>[@model.layers.0.self_attn.q_proj.weight][symbol:model.layers.0.self_attn.q_proj.weight])[symbol:model.layers.0.self_attn.q_proj.weight] - tensor.CPU.register () -> (%133:tensor<[1024, 2048], Float32, CPU>[@model.layers.0.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=67), symbol:model.layers.0.self_attn.k_proj.weight])[symbol:model.layers.0.self_attn.k_proj.weight] - tensor.CPU.register () -> (%179:tensor<[1024, 2048], Float32, CPU>[@model.layers.0.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=69), symbol:model.layers.0.self_attn.v_proj.weight])[symbol:model.layers.0.self_attn.v_proj.weight] - tensor.CPU.register () -> (%200:tensor<[128], Float32, CPU>[@model.layers.0.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=73), symbol:model.layers.0.self_attn.q_norm.weight])[symbol:model.layers.0.self_attn.q_norm.weight] - tensor.CPU.register () -> (%291:tensor<[128], Float32, CPU>[@model.layers.0.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=75), symbol:model.layers.0.self_attn.k_norm.weight])[symbol:model.layers.0.self_attn.k_norm.weight] - tensor.CPU.register () -> (%269:tensor<[2048, 2048], Float32, CPU>[@model.layers.0.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=88), symbol:model.layers.0.self_attn.o_proj.weight])[symbol:model.layers.0.self_attn.o_proj.weight] - tensor.CPU.register () -> (%40:tensor<[2048], Float32, CPU>[@model.layers.0.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=91), symbol:model.layers.0.post_attention_layernorm.weight])[symbol:model.layers.0.post_attention_layernorm.weight] - tensor.CPU.register () -> (%9:tensor<[6144, 2048], Float32, CPU>[@model.layers.0.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=92), symbol:model.layers.0.mlp.gate_proj.weight])[symbol:model.layers.0.mlp.gate_proj.weight] - tensor.CPU.register () -> (%111:tensor<[6144, 2048], Float32, CPU>[@model.layers.0.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=95), symbol:model.layers.0.mlp.up_proj.weight])[symbol:model.layers.0.mlp.up_proj.weight] - tensor.CPU.register () -> (%184:tensor<[2048, 6144], Float32, CPU>[@model.layers.0.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=97), symbol:model.layers.0.mlp.down_proj.weight])[symbol:model.layers.0.mlp.down_proj.weight] - tensor.CPU.register () -> (%180:tensor<[2048], Float32, CPU>[@model.layers.1.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100), symbol:model.layers.1.input_layernorm.weight])[symbol:model.layers.1.input_layernorm.weight] - tensor.CPU.register () -> (%285:tensor<[2048, 2048], Float32, CPU>[@model.layers.1.self_attn.q_proj.weight][symbol:model.layers.1.self_attn.q_proj.weight])[symbol:model.layers.1.self_attn.q_proj.weight] - tensor.CPU.register () -> (%32:tensor<[1024, 2048], Float32, CPU>[@model.layers.1.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=101), symbol:model.layers.1.self_attn.k_proj.weight])[symbol:model.layers.1.self_attn.k_proj.weight] - tensor.CPU.register () -> (%154:tensor<[1024, 2048], Float32, CPU>[@model.layers.1.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=103), symbol:model.layers.1.self_attn.v_proj.weight])[symbol:model.layers.1.self_attn.v_proj.weight] - tensor.CPU.register () -> (%131:tensor<[128], Float32, CPU>[@model.layers.1.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=107), symbol:model.layers.1.self_attn.q_norm.weight])[symbol:model.layers.1.self_attn.q_norm.weight] - tensor.CPU.register () -> (%68:tensor<[128], Float32, CPU>[@model.layers.1.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109), symbol:model.layers.1.self_attn.k_norm.weight])[symbol:model.layers.1.self_attn.k_norm.weight] - tensor.CPU.register () -> (%20:tensor<[2048, 2048], Float32, CPU>[@model.layers.1.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=122), symbol:model.layers.1.self_attn.o_proj.weight])[symbol:model.layers.1.self_attn.o_proj.weight] - tensor.CPU.register () -> (%73:tensor<[2048], Float32, CPU>[@model.layers.1.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125), symbol:model.layers.1.post_attention_layernorm.weight])[symbol:model.layers.1.post_attention_layernorm.weight] - tensor.CPU.register () -> (%245:tensor<[6144, 2048], Float32, CPU>[@model.layers.1.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=126), symbol:model.layers.1.mlp.gate_proj.weight])[symbol:model.layers.1.mlp.gate_proj.weight] - tensor.CPU.register () -> (%230:tensor<[6144, 2048], Float32, CPU>[@model.layers.1.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=129), symbol:model.layers.1.mlp.up_proj.weight])[symbol:model.layers.1.mlp.up_proj.weight] - tensor.CPU.register () -> (%43:tensor<[2048, 6144], Float32, CPU>[@model.layers.1.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=131), symbol:model.layers.1.mlp.down_proj.weight])[symbol:model.layers.1.mlp.down_proj.weight] - tensor.CPU.register () -> (%86:tensor<[2048], Float32, CPU>[@model.layers.2.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134), symbol:model.layers.2.input_layernorm.weight])[symbol:model.layers.2.input_layernorm.weight] - tensor.CPU.register () -> (%221:tensor<[2048, 2048], Float32, CPU>[@model.layers.2.self_attn.q_proj.weight][symbol:model.layers.2.self_attn.q_proj.weight])[symbol:model.layers.2.self_attn.q_proj.weight] - tensor.CPU.register () -> (%103:tensor<[1024, 2048], Float32, CPU>[@model.layers.2.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=135), symbol:model.layers.2.self_attn.k_proj.weight])[symbol:model.layers.2.self_attn.k_proj.weight] - tensor.CPU.register () -> (%47:tensor<[1024, 2048], Float32, CPU>[@model.layers.2.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=137), symbol:model.layers.2.self_attn.v_proj.weight])[symbol:model.layers.2.self_attn.v_proj.weight] - tensor.CPU.register () -> (%65:tensor<[128], Float32, CPU>[@model.layers.2.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=141), symbol:model.layers.2.self_attn.q_norm.weight])[symbol:model.layers.2.self_attn.q_norm.weight] - tensor.CPU.register () -> (%16:tensor<[128], Float32, CPU>[@model.layers.2.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143), symbol:model.layers.2.self_attn.k_norm.weight])[symbol:model.layers.2.self_attn.k_norm.weight] - tensor.CPU.register () -> (%85:tensor<[2048, 2048], Float32, CPU>[@model.layers.2.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=156), symbol:model.layers.2.self_attn.o_proj.weight])[symbol:model.layers.2.self_attn.o_proj.weight] - tensor.CPU.register () -> (%128:tensor<[2048], Float32, CPU>[@model.layers.2.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159), symbol:model.layers.2.post_attention_layernorm.weight])[symbol:model.layers.2.post_attention_layernorm.weight] - tensor.CPU.register () -> (%252:tensor<[6144, 2048], Float32, CPU>[@model.layers.2.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=160), symbol:model.layers.2.mlp.gate_proj.weight])[symbol:model.layers.2.mlp.gate_proj.weight] - tensor.CPU.register () -> (%24:tensor<[6144, 2048], Float32, CPU>[@model.layers.2.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=163), symbol:model.layers.2.mlp.up_proj.weight])[symbol:model.layers.2.mlp.up_proj.weight] - tensor.CPU.register () -> (%28:tensor<[2048, 6144], Float32, CPU>[@model.layers.2.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=165), symbol:model.layers.2.mlp.down_proj.weight])[symbol:model.layers.2.mlp.down_proj.weight] - tensor.CPU.register () -> (%1:tensor<[2048], Float32, CPU>[@model.layers.3.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168), symbol:model.layers.3.input_layernorm.weight])[symbol:model.layers.3.input_layernorm.weight] - tensor.CPU.register () -> (%283:tensor<[2048, 2048], Float32, CPU>[@model.layers.3.self_attn.q_proj.weight][symbol:model.layers.3.self_attn.q_proj.weight])[symbol:model.layers.3.self_attn.q_proj.weight] - tensor.CPU.register () -> (%48:tensor<[1024, 2048], Float32, CPU>[@model.layers.3.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=169), symbol:model.layers.3.self_attn.k_proj.weight])[symbol:model.layers.3.self_attn.k_proj.weight] - tensor.CPU.register () -> (%244:tensor<[1024, 2048], Float32, CPU>[@model.layers.3.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=171), symbol:model.layers.3.self_attn.v_proj.weight])[symbol:model.layers.3.self_attn.v_proj.weight] - tensor.CPU.register () -> (%33:tensor<[128], Float32, CPU>[@model.layers.3.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=175), symbol:model.layers.3.self_attn.q_norm.weight])[symbol:model.layers.3.self_attn.q_norm.weight] - tensor.CPU.register () -> (%202:tensor<[128], Float32, CPU>[@model.layers.3.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177), symbol:model.layers.3.self_attn.k_norm.weight])[symbol:model.layers.3.self_attn.k_norm.weight] - tensor.CPU.register () -> (%301:tensor<[2048, 2048], Float32, CPU>[@model.layers.3.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=190), symbol:model.layers.3.self_attn.o_proj.weight])[symbol:model.layers.3.self_attn.o_proj.weight] - tensor.CPU.register () -> (%223:tensor<[2048], Float32, CPU>[@model.layers.3.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=193), symbol:model.layers.3.post_attention_layernorm.weight])[symbol:model.layers.3.post_attention_layernorm.weight] - tensor.CPU.register () -> (%129:tensor<[6144, 2048], Float32, CPU>[@model.layers.3.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=194), symbol:model.layers.3.mlp.gate_proj.weight])[symbol:model.layers.3.mlp.gate_proj.weight] - tensor.CPU.register () -> (%188:tensor<[6144, 2048], Float32, CPU>[@model.layers.3.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=197), symbol:model.layers.3.mlp.up_proj.weight])[symbol:model.layers.3.mlp.up_proj.weight] - tensor.CPU.register () -> (%97:tensor<[2048, 6144], Float32, CPU>[@model.layers.3.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=199), symbol:model.layers.3.mlp.down_proj.weight])[symbol:model.layers.3.mlp.down_proj.weight] - tensor.CPU.register () -> (%3:tensor<[2048], Float32, CPU>[@model.layers.4.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202), symbol:model.layers.4.input_layernorm.weight])[symbol:model.layers.4.input_layernorm.weight] - tensor.CPU.register () -> (%164:tensor<[2048, 2048], Float32, CPU>[@model.layers.4.self_attn.q_proj.weight][symbol:model.layers.4.self_attn.q_proj.weight])[symbol:model.layers.4.self_attn.q_proj.weight] - tensor.CPU.register () -> (%148:tensor<[1024, 2048], Float32, CPU>[@model.layers.4.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=203), symbol:model.layers.4.self_attn.k_proj.weight])[symbol:model.layers.4.self_attn.k_proj.weight] - tensor.CPU.register () -> (%279:tensor<[1024, 2048], Float32, CPU>[@model.layers.4.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=205), symbol:model.layers.4.self_attn.v_proj.weight])[symbol:model.layers.4.self_attn.v_proj.weight] - tensor.CPU.register () -> (%145:tensor<[128], Float32, CPU>[@model.layers.4.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=209), symbol:model.layers.4.self_attn.q_norm.weight])[symbol:model.layers.4.self_attn.q_norm.weight] - tensor.CPU.register () -> (%282:tensor<[128], Float32, CPU>[@model.layers.4.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=211), symbol:model.layers.4.self_attn.k_norm.weight])[symbol:model.layers.4.self_attn.k_norm.weight] - tensor.CPU.register () -> (%91:tensor<[2048, 2048], Float32, CPU>[@model.layers.4.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=224), symbol:model.layers.4.self_attn.o_proj.weight])[symbol:model.layers.4.self_attn.o_proj.weight] - tensor.CPU.register () -> (%258:tensor<[2048], Float32, CPU>[@model.layers.4.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227), symbol:model.layers.4.post_attention_layernorm.weight])[symbol:model.layers.4.post_attention_layernorm.weight] - tensor.CPU.register () -> (%189:tensor<[6144, 2048], Float32, CPU>[@model.layers.4.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=228), symbol:model.layers.4.mlp.gate_proj.weight])[symbol:model.layers.4.mlp.gate_proj.weight] - tensor.CPU.register () -> (%156:tensor<[6144, 2048], Float32, CPU>[@model.layers.4.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=231), symbol:model.layers.4.mlp.up_proj.weight])[symbol:model.layers.4.mlp.up_proj.weight] - tensor.CPU.register () -> (%153:tensor<[2048, 6144], Float32, CPU>[@model.layers.4.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=233), symbol:model.layers.4.mlp.down_proj.weight])[symbol:model.layers.4.mlp.down_proj.weight] - tensor.CPU.register () -> (%256:tensor<[2048], Float32, CPU>[@model.layers.5.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236), symbol:model.layers.5.input_layernorm.weight])[symbol:model.layers.5.input_layernorm.weight] - tensor.CPU.register () -> (%78:tensor<[2048, 2048], Float32, CPU>[@model.layers.5.self_attn.q_proj.weight][symbol:model.layers.5.self_attn.q_proj.weight])[symbol:model.layers.5.self_attn.q_proj.weight] - tensor.CPU.register () -> (%72:tensor<[1024, 2048], Float32, CPU>[@model.layers.5.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=237), symbol:model.layers.5.self_attn.k_proj.weight])[symbol:model.layers.5.self_attn.k_proj.weight] - tensor.CPU.register () -> (%289:tensor<[1024, 2048], Float32, CPU>[@model.layers.5.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=239), symbol:model.layers.5.self_attn.v_proj.weight])[symbol:model.layers.5.self_attn.v_proj.weight] - tensor.CPU.register () -> (%225:tensor<[128], Float32, CPU>[@model.layers.5.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=243), symbol:model.layers.5.self_attn.q_norm.weight])[symbol:model.layers.5.self_attn.q_norm.weight] - tensor.CPU.register () -> (%7:tensor<[128], Float32, CPU>[@model.layers.5.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245), symbol:model.layers.5.self_attn.k_norm.weight])[symbol:model.layers.5.self_attn.k_norm.weight] - tensor.CPU.register () -> (%264:tensor<[2048, 2048], Float32, CPU>[@model.layers.5.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=258), symbol:model.layers.5.self_attn.o_proj.weight])[symbol:model.layers.5.self_attn.o_proj.weight] - tensor.CPU.register () -> (%99:tensor<[2048], Float32, CPU>[@model.layers.5.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=261), symbol:model.layers.5.post_attention_layernorm.weight])[symbol:model.layers.5.post_attention_layernorm.weight] - tensor.CPU.register () -> (%4:tensor<[6144, 2048], Float32, CPU>[@model.layers.5.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=262), symbol:model.layers.5.mlp.gate_proj.weight])[symbol:model.layers.5.mlp.gate_proj.weight] - tensor.CPU.register () -> (%308:tensor<[6144, 2048], Float32, CPU>[@model.layers.5.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=265), symbol:model.layers.5.mlp.up_proj.weight])[symbol:model.layers.5.mlp.up_proj.weight] - tensor.CPU.register () -> (%74:tensor<[2048, 6144], Float32, CPU>[@model.layers.5.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=267), symbol:model.layers.5.mlp.down_proj.weight])[symbol:model.layers.5.mlp.down_proj.weight] - tensor.CPU.register () -> (%132:tensor<[2048], Float32, CPU>[@model.layers.6.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270), symbol:model.layers.6.input_layernorm.weight])[symbol:model.layers.6.input_layernorm.weight] - tensor.CPU.register () -> (%59:tensor<[2048, 2048], Float32, CPU>[@model.layers.6.self_attn.q_proj.weight][symbol:model.layers.6.self_attn.q_proj.weight])[symbol:model.layers.6.self_attn.q_proj.weight] - tensor.CPU.register () -> (%208:tensor<[1024, 2048], Float32, CPU>[@model.layers.6.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=271), symbol:model.layers.6.self_attn.k_proj.weight])[symbol:model.layers.6.self_attn.k_proj.weight] - tensor.CPU.register () -> (%238:tensor<[1024, 2048], Float32, CPU>[@model.layers.6.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=273), symbol:model.layers.6.self_attn.v_proj.weight])[symbol:model.layers.6.self_attn.v_proj.weight] - tensor.CPU.register () -> (%294:tensor<[128], Float32, CPU>[@model.layers.6.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=277), symbol:model.layers.6.self_attn.q_norm.weight])[symbol:model.layers.6.self_attn.q_norm.weight] - tensor.CPU.register () -> (%71:tensor<[128], Float32, CPU>[@model.layers.6.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279), symbol:model.layers.6.self_attn.k_norm.weight])[symbol:model.layers.6.self_attn.k_norm.weight] - tensor.CPU.register () -> (%52:tensor<[2048, 2048], Float32, CPU>[@model.layers.6.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=292), symbol:model.layers.6.self_attn.o_proj.weight])[symbol:model.layers.6.self_attn.o_proj.weight] - tensor.CPU.register () -> (%108:tensor<[2048], Float32, CPU>[@model.layers.6.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=295), symbol:model.layers.6.post_attention_layernorm.weight])[symbol:model.layers.6.post_attention_layernorm.weight] - tensor.CPU.register () -> (%80:tensor<[6144, 2048], Float32, CPU>[@model.layers.6.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=296), symbol:model.layers.6.mlp.gate_proj.weight])[symbol:model.layers.6.mlp.gate_proj.weight] - tensor.CPU.register () -> (%276:tensor<[6144, 2048], Float32, CPU>[@model.layers.6.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=299), symbol:model.layers.6.mlp.up_proj.weight])[symbol:model.layers.6.mlp.up_proj.weight] - tensor.CPU.register () -> (%227:tensor<[2048, 6144], Float32, CPU>[@model.layers.6.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=301), symbol:model.layers.6.mlp.down_proj.weight])[symbol:model.layers.6.mlp.down_proj.weight] - tensor.CPU.register () -> (%107:tensor<[2048], Float32, CPU>[@model.layers.7.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304), symbol:model.layers.7.input_layernorm.weight])[symbol:model.layers.7.input_layernorm.weight] - tensor.CPU.register () -> (%287:tensor<[2048, 2048], Float32, CPU>[@model.layers.7.self_attn.q_proj.weight][symbol:model.layers.7.self_attn.q_proj.weight])[symbol:model.layers.7.self_attn.q_proj.weight] - tensor.CPU.register () -> (%135:tensor<[1024, 2048], Float32, CPU>[@model.layers.7.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=305), symbol:model.layers.7.self_attn.k_proj.weight])[symbol:model.layers.7.self_attn.k_proj.weight] - tensor.CPU.register () -> (%300:tensor<[1024, 2048], Float32, CPU>[@model.layers.7.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=307), symbol:model.layers.7.self_attn.v_proj.weight])[symbol:model.layers.7.self_attn.v_proj.weight] - tensor.CPU.register () -> (%23:tensor<[128], Float32, CPU>[@model.layers.7.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=311), symbol:model.layers.7.self_attn.q_norm.weight])[symbol:model.layers.7.self_attn.q_norm.weight] - tensor.CPU.register () -> (%137:tensor<[128], Float32, CPU>[@model.layers.7.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=313), symbol:model.layers.7.self_attn.k_norm.weight])[symbol:model.layers.7.self_attn.k_norm.weight] - tensor.CPU.register () -> (%251:tensor<[2048, 2048], Float32, CPU>[@model.layers.7.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=326), symbol:model.layers.7.self_attn.o_proj.weight])[symbol:model.layers.7.self_attn.o_proj.weight] - tensor.CPU.register () -> (%53:tensor<[2048], Float32, CPU>[@model.layers.7.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329), symbol:model.layers.7.post_attention_layernorm.weight])[symbol:model.layers.7.post_attention_layernorm.weight] - tensor.CPU.register () -> (%155:tensor<[6144, 2048], Float32, CPU>[@model.layers.7.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=330), symbol:model.layers.7.mlp.gate_proj.weight])[symbol:model.layers.7.mlp.gate_proj.weight] - tensor.CPU.register () -> (%218:tensor<[6144, 2048], Float32, CPU>[@model.layers.7.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=333), symbol:model.layers.7.mlp.up_proj.weight])[symbol:model.layers.7.mlp.up_proj.weight] - tensor.CPU.register () -> (%275:tensor<[2048, 6144], Float32, CPU>[@model.layers.7.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=335), symbol:model.layers.7.mlp.down_proj.weight])[symbol:model.layers.7.mlp.down_proj.weight] - tensor.CPU.register () -> (%171:tensor<[2048], Float32, CPU>[@model.layers.8.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338), symbol:model.layers.8.input_layernorm.weight])[symbol:model.layers.8.input_layernorm.weight] - tensor.CPU.register () -> (%165:tensor<[2048, 2048], Float32, CPU>[@model.layers.8.self_attn.q_proj.weight][symbol:model.layers.8.self_attn.q_proj.weight])[symbol:model.layers.8.self_attn.q_proj.weight] - tensor.CPU.register () -> (%194:tensor<[1024, 2048], Float32, CPU>[@model.layers.8.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=339), symbol:model.layers.8.self_attn.k_proj.weight])[symbol:model.layers.8.self_attn.k_proj.weight] - tensor.CPU.register () -> (%181:tensor<[1024, 2048], Float32, CPU>[@model.layers.8.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=341), symbol:model.layers.8.self_attn.v_proj.weight])[symbol:model.layers.8.self_attn.v_proj.weight] - tensor.CPU.register () -> (%309:tensor<[128], Float32, CPU>[@model.layers.8.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=345), symbol:model.layers.8.self_attn.q_norm.weight])[symbol:model.layers.8.self_attn.q_norm.weight] - tensor.CPU.register () -> (%92:tensor<[128], Float32, CPU>[@model.layers.8.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347), symbol:model.layers.8.self_attn.k_norm.weight])[symbol:model.layers.8.self_attn.k_norm.weight] - tensor.CPU.register () -> (%197:tensor<[2048, 2048], Float32, CPU>[@model.layers.8.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=360), symbol:model.layers.8.self_attn.o_proj.weight])[symbol:model.layers.8.self_attn.o_proj.weight] - tensor.CPU.register () -> (%122:tensor<[2048], Float32, CPU>[@model.layers.8.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=363), symbol:model.layers.8.post_attention_layernorm.weight])[symbol:model.layers.8.post_attention_layernorm.weight] - tensor.CPU.register () -> (%110:tensor<[6144, 2048], Float32, CPU>[@model.layers.8.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=364), symbol:model.layers.8.mlp.gate_proj.weight])[symbol:model.layers.8.mlp.gate_proj.weight] - tensor.CPU.register () -> (%236:tensor<[6144, 2048], Float32, CPU>[@model.layers.8.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=367), symbol:model.layers.8.mlp.up_proj.weight])[symbol:model.layers.8.mlp.up_proj.weight] - tensor.CPU.register () -> (%106:tensor<[2048, 6144], Float32, CPU>[@model.layers.8.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=369), symbol:model.layers.8.mlp.down_proj.weight])[symbol:model.layers.8.mlp.down_proj.weight] - tensor.CPU.register () -> (%178:tensor<[2048], Float32, CPU>[@model.layers.9.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372), symbol:model.layers.9.input_layernorm.weight])[symbol:model.layers.9.input_layernorm.weight] - tensor.CPU.register () -> (%235:tensor<[2048, 2048], Float32, CPU>[@model.layers.9.self_attn.q_proj.weight][symbol:model.layers.9.self_attn.q_proj.weight])[symbol:model.layers.9.self_attn.q_proj.weight] - tensor.CPU.register () -> (%69:tensor<[1024, 2048], Float32, CPU>[@model.layers.9.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=373), symbol:model.layers.9.self_attn.k_proj.weight])[symbol:model.layers.9.self_attn.k_proj.weight] - tensor.CPU.register () -> (%120:tensor<[1024, 2048], Float32, CPU>[@model.layers.9.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=375), symbol:model.layers.9.self_attn.v_proj.weight])[symbol:model.layers.9.self_attn.v_proj.weight] - tensor.CPU.register () -> (%140:tensor<[128], Float32, CPU>[@model.layers.9.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=379), symbol:model.layers.9.self_attn.q_norm.weight])[symbol:model.layers.9.self_attn.q_norm.weight] - tensor.CPU.register () -> (%29:tensor<[128], Float32, CPU>[@model.layers.9.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=381), symbol:model.layers.9.self_attn.k_norm.weight])[symbol:model.layers.9.self_attn.k_norm.weight] - tensor.CPU.register () -> (%205:tensor<[2048, 2048], Float32, CPU>[@model.layers.9.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=394), symbol:model.layers.9.self_attn.o_proj.weight])[symbol:model.layers.9.self_attn.o_proj.weight] - tensor.CPU.register () -> (%304:tensor<[2048], Float32, CPU>[@model.layers.9.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397), symbol:model.layers.9.post_attention_layernorm.weight])[symbol:model.layers.9.post_attention_layernorm.weight] - tensor.CPU.register () -> (%263:tensor<[6144, 2048], Float32, CPU>[@model.layers.9.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=398), symbol:model.layers.9.mlp.gate_proj.weight])[symbol:model.layers.9.mlp.gate_proj.weight] - tensor.CPU.register () -> (%102:tensor<[6144, 2048], Float32, CPU>[@model.layers.9.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=401), symbol:model.layers.9.mlp.up_proj.weight])[symbol:model.layers.9.mlp.up_proj.weight] - tensor.CPU.register () -> (%136:tensor<[2048, 6144], Float32, CPU>[@model.layers.9.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=403), symbol:model.layers.9.mlp.down_proj.weight])[symbol:model.layers.9.mlp.down_proj.weight] - tensor.CPU.register () -> (%186:tensor<[2048], Float32, CPU>[@model.layers.10.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406), symbol:model.layers.10.input_layernorm.weight])[symbol:model.layers.10.input_layernorm.weight] - tensor.CPU.register () -> (%278:tensor<[2048, 2048], Float32, CPU>[@model.layers.10.self_attn.q_proj.weight][symbol:model.layers.10.self_attn.q_proj.weight])[symbol:model.layers.10.self_attn.q_proj.weight] - tensor.CPU.register () -> (%182:tensor<[1024, 2048], Float32, CPU>[@model.layers.10.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=407), symbol:model.layers.10.self_attn.k_proj.weight])[symbol:model.layers.10.self_attn.k_proj.weight] - tensor.CPU.register () -> (%138:tensor<[1024, 2048], Float32, CPU>[@model.layers.10.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=409), symbol:model.layers.10.self_attn.v_proj.weight])[symbol:model.layers.10.self_attn.v_proj.weight] - tensor.CPU.register () -> (%305:tensor<[128], Float32, CPU>[@model.layers.10.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=413), symbol:model.layers.10.self_attn.q_norm.weight])[symbol:model.layers.10.self_attn.q_norm.weight] - tensor.CPU.register () -> (%272:tensor<[128], Float32, CPU>[@model.layers.10.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=415), symbol:model.layers.10.self_attn.k_norm.weight])[symbol:model.layers.10.self_attn.k_norm.weight] - tensor.CPU.register () -> (%233:tensor<[2048, 2048], Float32, CPU>[@model.layers.10.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=428), symbol:model.layers.10.self_attn.o_proj.weight])[symbol:model.layers.10.self_attn.o_proj.weight] - tensor.CPU.register () -> (%266:tensor<[2048], Float32, CPU>[@model.layers.10.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431), symbol:model.layers.10.post_attention_layernorm.weight])[symbol:model.layers.10.post_attention_layernorm.weight] - tensor.CPU.register () -> (%124:tensor<[6144, 2048], Float32, CPU>[@model.layers.10.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=432), symbol:model.layers.10.mlp.gate_proj.weight])[symbol:model.layers.10.mlp.gate_proj.weight] - tensor.CPU.register () -> (%261:tensor<[6144, 2048], Float32, CPU>[@model.layers.10.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=435), symbol:model.layers.10.mlp.up_proj.weight])[symbol:model.layers.10.mlp.up_proj.weight] - tensor.CPU.register () -> (%45:tensor<[2048, 6144], Float32, CPU>[@model.layers.10.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=437), symbol:model.layers.10.mlp.down_proj.weight])[symbol:model.layers.10.mlp.down_proj.weight] - tensor.CPU.register () -> (%219:tensor<[2048], Float32, CPU>[@model.layers.11.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440), symbol:model.layers.11.input_layernorm.weight])[symbol:model.layers.11.input_layernorm.weight] - tensor.CPU.register () -> (%274:tensor<[2048, 2048], Float32, CPU>[@model.layers.11.self_attn.q_proj.weight][symbol:model.layers.11.self_attn.q_proj.weight])[symbol:model.layers.11.self_attn.q_proj.weight] - tensor.CPU.register () -> (%157:tensor<[1024, 2048], Float32, CPU>[@model.layers.11.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=441), symbol:model.layers.11.self_attn.k_proj.weight])[symbol:model.layers.11.self_attn.k_proj.weight] - tensor.CPU.register () -> (%63:tensor<[1024, 2048], Float32, CPU>[@model.layers.11.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=443), symbol:model.layers.11.self_attn.v_proj.weight])[symbol:model.layers.11.self_attn.v_proj.weight] - tensor.CPU.register () -> (%214:tensor<[128], Float32, CPU>[@model.layers.11.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=447), symbol:model.layers.11.self_attn.q_norm.weight])[symbol:model.layers.11.self_attn.q_norm.weight] - tensor.CPU.register () -> (%201:tensor<[128], Float32, CPU>[@model.layers.11.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449), symbol:model.layers.11.self_attn.k_norm.weight])[symbol:model.layers.11.self_attn.k_norm.weight] - tensor.CPU.register () -> (%118:tensor<[2048, 2048], Float32, CPU>[@model.layers.11.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=462), symbol:model.layers.11.self_attn.o_proj.weight])[symbol:model.layers.11.self_attn.o_proj.weight] - tensor.CPU.register () -> (%151:tensor<[2048], Float32, CPU>[@model.layers.11.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=465), symbol:model.layers.11.post_attention_layernorm.weight])[symbol:model.layers.11.post_attention_layernorm.weight] - tensor.CPU.register () -> (%207:tensor<[6144, 2048], Float32, CPU>[@model.layers.11.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=466), symbol:model.layers.11.mlp.gate_proj.weight])[symbol:model.layers.11.mlp.gate_proj.weight] - tensor.CPU.register () -> (%226:tensor<[6144, 2048], Float32, CPU>[@model.layers.11.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=469), symbol:model.layers.11.mlp.up_proj.weight])[symbol:model.layers.11.mlp.up_proj.weight] - tensor.CPU.register () -> (%224:tensor<[2048, 6144], Float32, CPU>[@model.layers.11.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=471), symbol:model.layers.11.mlp.down_proj.weight])[symbol:model.layers.11.mlp.down_proj.weight] - tensor.CPU.register () -> (%55:tensor<[2048], Float32, CPU>[@model.layers.12.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474), symbol:model.layers.12.input_layernorm.weight])[symbol:model.layers.12.input_layernorm.weight] - tensor.CPU.register () -> (%217:tensor<[2048, 2048], Float32, CPU>[@model.layers.12.self_attn.q_proj.weight][symbol:model.layers.12.self_attn.q_proj.weight])[symbol:model.layers.12.self_attn.q_proj.weight] - tensor.CPU.register () -> (%297:tensor<[1024, 2048], Float32, CPU>[@model.layers.12.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=475), symbol:model.layers.12.self_attn.k_proj.weight])[symbol:model.layers.12.self_attn.k_proj.weight] - tensor.CPU.register () -> (%94:tensor<[1024, 2048], Float32, CPU>[@model.layers.12.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=477), symbol:model.layers.12.self_attn.v_proj.weight])[symbol:model.layers.12.self_attn.v_proj.weight] - tensor.CPU.register () -> (%161:tensor<[128], Float32, CPU>[@model.layers.12.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=481), symbol:model.layers.12.self_attn.q_norm.weight])[symbol:model.layers.12.self_attn.q_norm.weight] - tensor.CPU.register () -> (%277:tensor<[128], Float32, CPU>[@model.layers.12.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=483), symbol:model.layers.12.self_attn.k_norm.weight])[symbol:model.layers.12.self_attn.k_norm.weight] - tensor.CPU.register () -> (%49:tensor<[2048, 2048], Float32, CPU>[@model.layers.12.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=496), symbol:model.layers.12.self_attn.o_proj.weight])[symbol:model.layers.12.self_attn.o_proj.weight] - tensor.CPU.register () -> (%14:tensor<[2048], Float32, CPU>[@model.layers.12.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499), symbol:model.layers.12.post_attention_layernorm.weight])[symbol:model.layers.12.post_attention_layernorm.weight] - tensor.CPU.register () -> (%262:tensor<[6144, 2048], Float32, CPU>[@model.layers.12.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=500), symbol:model.layers.12.mlp.gate_proj.weight])[symbol:model.layers.12.mlp.gate_proj.weight] - tensor.CPU.register () -> (%255:tensor<[6144, 2048], Float32, CPU>[@model.layers.12.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=503), symbol:model.layers.12.mlp.up_proj.weight])[symbol:model.layers.12.mlp.up_proj.weight] - tensor.CPU.register () -> (%22:tensor<[2048, 6144], Float32, CPU>[@model.layers.12.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=505), symbol:model.layers.12.mlp.down_proj.weight])[symbol:model.layers.12.mlp.down_proj.weight] - tensor.CPU.register () -> (%212:tensor<[2048], Float32, CPU>[@model.layers.13.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508), symbol:model.layers.13.input_layernorm.weight])[symbol:model.layers.13.input_layernorm.weight] - tensor.CPU.register () -> (%114:tensor<[2048, 2048], Float32, CPU>[@model.layers.13.self_attn.q_proj.weight][symbol:model.layers.13.self_attn.q_proj.weight])[symbol:model.layers.13.self_attn.q_proj.weight] - tensor.CPU.register () -> (%152:tensor<[1024, 2048], Float32, CPU>[@model.layers.13.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=509), symbol:model.layers.13.self_attn.k_proj.weight])[symbol:model.layers.13.self_attn.k_proj.weight] - tensor.CPU.register () -> (%15:tensor<[1024, 2048], Float32, CPU>[@model.layers.13.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=511), symbol:model.layers.13.self_attn.v_proj.weight])[symbol:model.layers.13.self_attn.v_proj.weight] - tensor.CPU.register () -> (%307:tensor<[128], Float32, CPU>[@model.layers.13.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=515), symbol:model.layers.13.self_attn.q_norm.weight])[symbol:model.layers.13.self_attn.q_norm.weight] - tensor.CPU.register () -> (%30:tensor<[128], Float32, CPU>[@model.layers.13.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517), symbol:model.layers.13.self_attn.k_norm.weight])[symbol:model.layers.13.self_attn.k_norm.weight] - tensor.CPU.register () -> (%250:tensor<[2048, 2048], Float32, CPU>[@model.layers.13.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=530), symbol:model.layers.13.self_attn.o_proj.weight])[symbol:model.layers.13.self_attn.o_proj.weight] - tensor.CPU.register () -> (%160:tensor<[2048], Float32, CPU>[@model.layers.13.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533), symbol:model.layers.13.post_attention_layernorm.weight])[symbol:model.layers.13.post_attention_layernorm.weight] - tensor.CPU.register () -> (%247:tensor<[6144, 2048], Float32, CPU>[@model.layers.13.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=534), symbol:model.layers.13.mlp.gate_proj.weight])[symbol:model.layers.13.mlp.gate_proj.weight] - tensor.CPU.register () -> (%98:tensor<[6144, 2048], Float32, CPU>[@model.layers.13.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=537), symbol:model.layers.13.mlp.up_proj.weight])[symbol:model.layers.13.mlp.up_proj.weight] - tensor.CPU.register () -> (%193:tensor<[2048, 6144], Float32, CPU>[@model.layers.13.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=539), symbol:model.layers.13.mlp.down_proj.weight])[symbol:model.layers.13.mlp.down_proj.weight] - tensor.CPU.register () -> (%246:tensor<[2048], Float32, CPU>[@model.layers.14.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542), symbol:model.layers.14.input_layernorm.weight])[symbol:model.layers.14.input_layernorm.weight] - tensor.CPU.register () -> (%209:tensor<[2048, 2048], Float32, CPU>[@model.layers.14.self_attn.q_proj.weight][symbol:model.layers.14.self_attn.q_proj.weight])[symbol:model.layers.14.self_attn.q_proj.weight] - tensor.CPU.register () -> (%38:tensor<[1024, 2048], Float32, CPU>[@model.layers.14.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=543), symbol:model.layers.14.self_attn.k_proj.weight])[symbol:model.layers.14.self_attn.k_proj.weight] - tensor.CPU.register () -> (%232:tensor<[1024, 2048], Float32, CPU>[@model.layers.14.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=545), symbol:model.layers.14.self_attn.v_proj.weight])[symbol:model.layers.14.self_attn.v_proj.weight] - tensor.CPU.register () -> (%0:tensor<[128], Float32, CPU>[@model.layers.14.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=549), symbol:model.layers.14.self_attn.q_norm.weight])[symbol:model.layers.14.self_attn.q_norm.weight] - tensor.CPU.register () -> (%57:tensor<[128], Float32, CPU>[@model.layers.14.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551), symbol:model.layers.14.self_attn.k_norm.weight])[symbol:model.layers.14.self_attn.k_norm.weight] - tensor.CPU.register () -> (%168:tensor<[2048, 2048], Float32, CPU>[@model.layers.14.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=564), symbol:model.layers.14.self_attn.o_proj.weight])[symbol:model.layers.14.self_attn.o_proj.weight] - tensor.CPU.register () -> (%75:tensor<[2048], Float32, CPU>[@model.layers.14.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567), symbol:model.layers.14.post_attention_layernorm.weight])[symbol:model.layers.14.post_attention_layernorm.weight] - tensor.CPU.register () -> (%37:tensor<[6144, 2048], Float32, CPU>[@model.layers.14.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=568), symbol:model.layers.14.mlp.gate_proj.weight])[symbol:model.layers.14.mlp.gate_proj.weight] - tensor.CPU.register () -> (%147:tensor<[6144, 2048], Float32, CPU>[@model.layers.14.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=571), symbol:model.layers.14.mlp.up_proj.weight])[symbol:model.layers.14.mlp.up_proj.weight] - tensor.CPU.register () -> (%163:tensor<[2048, 6144], Float32, CPU>[@model.layers.14.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=573), symbol:model.layers.14.mlp.down_proj.weight])[symbol:model.layers.14.mlp.down_proj.weight] - tensor.CPU.register () -> (%67:tensor<[2048], Float32, CPU>[@model.layers.15.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576), symbol:model.layers.15.input_layernorm.weight])[symbol:model.layers.15.input_layernorm.weight] - tensor.CPU.register () -> (%46:tensor<[2048, 2048], Float32, CPU>[@model.layers.15.self_attn.q_proj.weight][symbol:model.layers.15.self_attn.q_proj.weight])[symbol:model.layers.15.self_attn.q_proj.weight] - tensor.CPU.register () -> (%268:tensor<[1024, 2048], Float32, CPU>[@model.layers.15.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=577), symbol:model.layers.15.self_attn.k_proj.weight])[symbol:model.layers.15.self_attn.k_proj.weight] - tensor.CPU.register () -> (%117:tensor<[1024, 2048], Float32, CPU>[@model.layers.15.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=579), symbol:model.layers.15.self_attn.v_proj.weight])[symbol:model.layers.15.self_attn.v_proj.weight] - tensor.CPU.register () -> (%213:tensor<[128], Float32, CPU>[@model.layers.15.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=583), symbol:model.layers.15.self_attn.q_norm.weight])[symbol:model.layers.15.self_attn.q_norm.weight] - tensor.CPU.register () -> (%100:tensor<[128], Float32, CPU>[@model.layers.15.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=585), symbol:model.layers.15.self_attn.k_norm.weight])[symbol:model.layers.15.self_attn.k_norm.weight] - tensor.CPU.register () -> (%303:tensor<[2048, 2048], Float32, CPU>[@model.layers.15.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=598), symbol:model.layers.15.self_attn.o_proj.weight])[symbol:model.layers.15.self_attn.o_proj.weight] - tensor.CPU.register () -> (%167:tensor<[2048], Float32, CPU>[@model.layers.15.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=601), symbol:model.layers.15.post_attention_layernorm.weight])[symbol:model.layers.15.post_attention_layernorm.weight] - tensor.CPU.register () -> (%260:tensor<[6144, 2048], Float32, CPU>[@model.layers.15.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=602), symbol:model.layers.15.mlp.gate_proj.weight])[symbol:model.layers.15.mlp.gate_proj.weight] - tensor.CPU.register () -> (%42:tensor<[6144, 2048], Float32, CPU>[@model.layers.15.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=605), symbol:model.layers.15.mlp.up_proj.weight])[symbol:model.layers.15.mlp.up_proj.weight] - tensor.CPU.register () -> (%290:tensor<[2048, 6144], Float32, CPU>[@model.layers.15.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=607), symbol:model.layers.15.mlp.down_proj.weight])[symbol:model.layers.15.mlp.down_proj.weight] - tensor.CPU.register () -> (%93:tensor<[2048], Float32, CPU>[@model.layers.16.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610), symbol:model.layers.16.input_layernorm.weight])[symbol:model.layers.16.input_layernorm.weight] - tensor.CPU.register () -> (%17:tensor<[2048, 2048], Float32, CPU>[@model.layers.16.self_attn.q_proj.weight][symbol:model.layers.16.self_attn.q_proj.weight])[symbol:model.layers.16.self_attn.q_proj.weight] - tensor.CPU.register () -> (%228:tensor<[1024, 2048], Float32, CPU>[@model.layers.16.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=611), symbol:model.layers.16.self_attn.k_proj.weight])[symbol:model.layers.16.self_attn.k_proj.weight] - tensor.CPU.register () -> (%66:tensor<[1024, 2048], Float32, CPU>[@model.layers.16.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=613), symbol:model.layers.16.self_attn.v_proj.weight])[symbol:model.layers.16.self_attn.v_proj.weight] - tensor.CPU.register () -> (%240:tensor<[128], Float32, CPU>[@model.layers.16.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=617), symbol:model.layers.16.self_attn.q_norm.weight])[symbol:model.layers.16.self_attn.q_norm.weight] - tensor.CPU.register () -> (%306:tensor<[128], Float32, CPU>[@model.layers.16.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619), symbol:model.layers.16.self_attn.k_norm.weight])[symbol:model.layers.16.self_attn.k_norm.weight] - tensor.CPU.register () -> (%211:tensor<[2048, 2048], Float32, CPU>[@model.layers.16.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=632), symbol:model.layers.16.self_attn.o_proj.weight])[symbol:model.layers.16.self_attn.o_proj.weight] - tensor.CPU.register () -> (%210:tensor<[2048], Float32, CPU>[@model.layers.16.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635), symbol:model.layers.16.post_attention_layernorm.weight])[symbol:model.layers.16.post_attention_layernorm.weight] - tensor.CPU.register () -> (%130:tensor<[6144, 2048], Float32, CPU>[@model.layers.16.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=636), symbol:model.layers.16.mlp.gate_proj.weight])[symbol:model.layers.16.mlp.gate_proj.weight] - tensor.CPU.register () -> (%79:tensor<[6144, 2048], Float32, CPU>[@model.layers.16.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=639), symbol:model.layers.16.mlp.up_proj.weight])[symbol:model.layers.16.mlp.up_proj.weight] - tensor.CPU.register () -> (%248:tensor<[2048, 6144], Float32, CPU>[@model.layers.16.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=641), symbol:model.layers.16.mlp.down_proj.weight])[symbol:model.layers.16.mlp.down_proj.weight] - tensor.CPU.register () -> (%231:tensor<[2048], Float32, CPU>[@model.layers.17.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644), symbol:model.layers.17.input_layernorm.weight])[symbol:model.layers.17.input_layernorm.weight] - tensor.CPU.register () -> (%64:tensor<[2048, 2048], Float32, CPU>[@model.layers.17.self_attn.q_proj.weight][symbol:model.layers.17.self_attn.q_proj.weight])[symbol:model.layers.17.self_attn.q_proj.weight] - tensor.CPU.register () -> (%237:tensor<[1024, 2048], Float32, CPU>[@model.layers.17.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=645), symbol:model.layers.17.self_attn.k_proj.weight])[symbol:model.layers.17.self_attn.k_proj.weight] - tensor.CPU.register () -> (%6:tensor<[1024, 2048], Float32, CPU>[@model.layers.17.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=647), symbol:model.layers.17.self_attn.v_proj.weight])[symbol:model.layers.17.self_attn.v_proj.weight] - tensor.CPU.register () -> (%222:tensor<[128], Float32, CPU>[@model.layers.17.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=651), symbol:model.layers.17.self_attn.q_norm.weight])[symbol:model.layers.17.self_attn.q_norm.weight] - tensor.CPU.register () -> (%191:tensor<[128], Float32, CPU>[@model.layers.17.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653), symbol:model.layers.17.self_attn.k_norm.weight])[symbol:model.layers.17.self_attn.k_norm.weight] - tensor.CPU.register () -> (%125:tensor<[2048, 2048], Float32, CPU>[@model.layers.17.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=666), symbol:model.layers.17.self_attn.o_proj.weight])[symbol:model.layers.17.self_attn.o_proj.weight] - tensor.CPU.register () -> (%242:tensor<[2048], Float32, CPU>[@model.layers.17.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669), symbol:model.layers.17.post_attention_layernorm.weight])[symbol:model.layers.17.post_attention_layernorm.weight] - tensor.CPU.register () -> (%177:tensor<[6144, 2048], Float32, CPU>[@model.layers.17.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=670), symbol:model.layers.17.mlp.gate_proj.weight])[symbol:model.layers.17.mlp.gate_proj.weight] - tensor.CPU.register () -> (%26:tensor<[6144, 2048], Float32, CPU>[@model.layers.17.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=673), symbol:model.layers.17.mlp.up_proj.weight])[symbol:model.layers.17.mlp.up_proj.weight] - tensor.CPU.register () -> (%25:tensor<[2048, 6144], Float32, CPU>[@model.layers.17.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=675), symbol:model.layers.17.mlp.down_proj.weight])[symbol:model.layers.17.mlp.down_proj.weight] - tensor.CPU.register () -> (%296:tensor<[2048], Float32, CPU>[@model.layers.18.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678), symbol:model.layers.18.input_layernorm.weight])[symbol:model.layers.18.input_layernorm.weight] - tensor.CPU.register () -> (%273:tensor<[2048, 2048], Float32, CPU>[@model.layers.18.self_attn.q_proj.weight][symbol:model.layers.18.self_attn.q_proj.weight])[symbol:model.layers.18.self_attn.q_proj.weight] - tensor.CPU.register () -> (%284:tensor<[1024, 2048], Float32, CPU>[@model.layers.18.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=679), symbol:model.layers.18.self_attn.k_proj.weight])[symbol:model.layers.18.self_attn.k_proj.weight] - tensor.CPU.register () -> (%18:tensor<[1024, 2048], Float32, CPU>[@model.layers.18.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=681), symbol:model.layers.18.self_attn.v_proj.weight])[symbol:model.layers.18.self_attn.v_proj.weight] - tensor.CPU.register () -> (%51:tensor<[128], Float32, CPU>[@model.layers.18.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=685), symbol:model.layers.18.self_attn.q_norm.weight])[symbol:model.layers.18.self_attn.q_norm.weight] - tensor.CPU.register () -> (%21:tensor<[128], Float32, CPU>[@model.layers.18.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687), symbol:model.layers.18.self_attn.k_norm.weight])[symbol:model.layers.18.self_attn.k_norm.weight] - tensor.CPU.register () -> (%2:tensor<[2048, 2048], Float32, CPU>[@model.layers.18.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=700), symbol:model.layers.18.self_attn.o_proj.weight])[symbol:model.layers.18.self_attn.o_proj.weight] - tensor.CPU.register () -> (%10:tensor<[2048], Float32, CPU>[@model.layers.18.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=703), symbol:model.layers.18.post_attention_layernorm.weight])[symbol:model.layers.18.post_attention_layernorm.weight] - tensor.CPU.register () -> (%166:tensor<[6144, 2048], Float32, CPU>[@model.layers.18.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=704), symbol:model.layers.18.mlp.gate_proj.weight])[symbol:model.layers.18.mlp.gate_proj.weight] - tensor.CPU.register () -> (%271:tensor<[6144, 2048], Float32, CPU>[@model.layers.18.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=707), symbol:model.layers.18.mlp.up_proj.weight])[symbol:model.layers.18.mlp.up_proj.weight] - tensor.CPU.register () -> (%112:tensor<[2048, 6144], Float32, CPU>[@model.layers.18.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=709), symbol:model.layers.18.mlp.down_proj.weight])[symbol:model.layers.18.mlp.down_proj.weight] - tensor.CPU.register () -> (%113:tensor<[2048], Float32, CPU>[@model.layers.19.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712), symbol:model.layers.19.input_layernorm.weight])[symbol:model.layers.19.input_layernorm.weight] - tensor.CPU.register () -> (%8:tensor<[2048, 2048], Float32, CPU>[@model.layers.19.self_attn.q_proj.weight][symbol:model.layers.19.self_attn.q_proj.weight])[symbol:model.layers.19.self_attn.q_proj.weight] - tensor.CPU.register () -> (%286:tensor<[1024, 2048], Float32, CPU>[@model.layers.19.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=713), symbol:model.layers.19.self_attn.k_proj.weight])[symbol:model.layers.19.self_attn.k_proj.weight] - tensor.CPU.register () -> (%50:tensor<[1024, 2048], Float32, CPU>[@model.layers.19.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=715), symbol:model.layers.19.self_attn.v_proj.weight])[symbol:model.layers.19.self_attn.v_proj.weight] - tensor.CPU.register () -> (%116:tensor<[128], Float32, CPU>[@model.layers.19.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=719), symbol:model.layers.19.self_attn.q_norm.weight])[symbol:model.layers.19.self_attn.q_norm.weight] - tensor.CPU.register () -> (%84:tensor<[128], Float32, CPU>[@model.layers.19.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=721), symbol:model.layers.19.self_attn.k_norm.weight])[symbol:model.layers.19.self_attn.k_norm.weight] - tensor.CPU.register () -> (%58:tensor<[2048, 2048], Float32, CPU>[@model.layers.19.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=734), symbol:model.layers.19.self_attn.o_proj.weight])[symbol:model.layers.19.self_attn.o_proj.weight] - tensor.CPU.register () -> (%95:tensor<[2048], Float32, CPU>[@model.layers.19.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737), symbol:model.layers.19.post_attention_layernorm.weight])[symbol:model.layers.19.post_attention_layernorm.weight] - tensor.CPU.register () -> (%281:tensor<[6144, 2048], Float32, CPU>[@model.layers.19.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=738), symbol:model.layers.19.mlp.gate_proj.weight])[symbol:model.layers.19.mlp.gate_proj.weight] - tensor.CPU.register () -> (%82:tensor<[6144, 2048], Float32, CPU>[@model.layers.19.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=741), symbol:model.layers.19.mlp.up_proj.weight])[symbol:model.layers.19.mlp.up_proj.weight] - tensor.CPU.register () -> (%173:tensor<[2048, 6144], Float32, CPU>[@model.layers.19.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=743), symbol:model.layers.19.mlp.down_proj.weight])[symbol:model.layers.19.mlp.down_proj.weight] - tensor.CPU.register () -> (%203:tensor<[2048], Float32, CPU>[@model.layers.20.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746), symbol:model.layers.20.input_layernorm.weight])[symbol:model.layers.20.input_layernorm.weight] - tensor.CPU.register () -> (%280:tensor<[2048, 2048], Float32, CPU>[@model.layers.20.self_attn.q_proj.weight][symbol:model.layers.20.self_attn.q_proj.weight])[symbol:model.layers.20.self_attn.q_proj.weight] - tensor.CPU.register () -> (%253:tensor<[1024, 2048], Float32, CPU>[@model.layers.20.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=747), symbol:model.layers.20.self_attn.k_proj.weight])[symbol:model.layers.20.self_attn.k_proj.weight] - tensor.CPU.register () -> (%239:tensor<[1024, 2048], Float32, CPU>[@model.layers.20.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=749), symbol:model.layers.20.self_attn.v_proj.weight])[symbol:model.layers.20.self_attn.v_proj.weight] - tensor.CPU.register () -> (%143:tensor<[128], Float32, CPU>[@model.layers.20.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=753), symbol:model.layers.20.self_attn.q_norm.weight])[symbol:model.layers.20.self_attn.q_norm.weight] - tensor.CPU.register () -> (%288:tensor<[128], Float32, CPU>[@model.layers.20.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755), symbol:model.layers.20.self_attn.k_norm.weight])[symbol:model.layers.20.self_attn.k_norm.weight] - tensor.CPU.register () -> (%41:tensor<[2048, 2048], Float32, CPU>[@model.layers.20.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=768), symbol:model.layers.20.self_attn.o_proj.weight])[symbol:model.layers.20.self_attn.o_proj.weight] - tensor.CPU.register () -> (%216:tensor<[2048], Float32, CPU>[@model.layers.20.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=771), symbol:model.layers.20.post_attention_layernorm.weight])[symbol:model.layers.20.post_attention_layernorm.weight] - tensor.CPU.register () -> (%172:tensor<[6144, 2048], Float32, CPU>[@model.layers.20.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=772), symbol:model.layers.20.mlp.gate_proj.weight])[symbol:model.layers.20.mlp.gate_proj.weight] - tensor.CPU.register () -> (%299:tensor<[6144, 2048], Float32, CPU>[@model.layers.20.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=775), symbol:model.layers.20.mlp.up_proj.weight])[symbol:model.layers.20.mlp.up_proj.weight] - tensor.CPU.register () -> (%123:tensor<[2048, 6144], Float32, CPU>[@model.layers.20.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=777), symbol:model.layers.20.mlp.down_proj.weight])[symbol:model.layers.20.mlp.down_proj.weight] - tensor.CPU.register () -> (%229:tensor<[2048], Float32, CPU>[@model.layers.21.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780), symbol:model.layers.21.input_layernorm.weight])[symbol:model.layers.21.input_layernorm.weight] - tensor.CPU.register () -> (%295:tensor<[2048, 2048], Float32, CPU>[@model.layers.21.self_attn.q_proj.weight][symbol:model.layers.21.self_attn.q_proj.weight])[symbol:model.layers.21.self_attn.q_proj.weight] - tensor.CPU.register () -> (%139:tensor<[1024, 2048], Float32, CPU>[@model.layers.21.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=781), symbol:model.layers.21.self_attn.k_proj.weight])[symbol:model.layers.21.self_attn.k_proj.weight] - tensor.CPU.register () -> (%142:tensor<[1024, 2048], Float32, CPU>[@model.layers.21.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=783), symbol:model.layers.21.self_attn.v_proj.weight])[symbol:model.layers.21.self_attn.v_proj.weight] - tensor.CPU.register () -> (%87:tensor<[128], Float32, CPU>[@model.layers.21.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=787), symbol:model.layers.21.self_attn.q_norm.weight])[symbol:model.layers.21.self_attn.q_norm.weight] - tensor.CPU.register () -> (%56:tensor<[128], Float32, CPU>[@model.layers.21.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789), symbol:model.layers.21.self_attn.k_norm.weight])[symbol:model.layers.21.self_attn.k_norm.weight] - tensor.CPU.register () -> (%115:tensor<[2048, 2048], Float32, CPU>[@model.layers.21.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=802), symbol:model.layers.21.self_attn.o_proj.weight])[symbol:model.layers.21.self_attn.o_proj.weight] - tensor.CPU.register () -> (%174:tensor<[2048], Float32, CPU>[@model.layers.21.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=805), symbol:model.layers.21.post_attention_layernorm.weight])[symbol:model.layers.21.post_attention_layernorm.weight] - tensor.CPU.register () -> (%259:tensor<[6144, 2048], Float32, CPU>[@model.layers.21.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=806), symbol:model.layers.21.mlp.gate_proj.weight])[symbol:model.layers.21.mlp.gate_proj.weight] - tensor.CPU.register () -> (%162:tensor<[6144, 2048], Float32, CPU>[@model.layers.21.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=809), symbol:model.layers.21.mlp.up_proj.weight])[symbol:model.layers.21.mlp.up_proj.weight] - tensor.CPU.register () -> (%183:tensor<[2048, 6144], Float32, CPU>[@model.layers.21.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=811), symbol:model.layers.21.mlp.down_proj.weight])[symbol:model.layers.21.mlp.down_proj.weight] - tensor.CPU.register () -> (%257:tensor<[2048], Float32, CPU>[@model.layers.22.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814), symbol:model.layers.22.input_layernorm.weight])[symbol:model.layers.22.input_layernorm.weight] - tensor.CPU.register () -> (%89:tensor<[2048, 2048], Float32, CPU>[@model.layers.22.self_attn.q_proj.weight][symbol:model.layers.22.self_attn.q_proj.weight])[symbol:model.layers.22.self_attn.q_proj.weight] - tensor.CPU.register () -> (%36:tensor<[1024, 2048], Float32, CPU>[@model.layers.22.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=815), symbol:model.layers.22.self_attn.k_proj.weight])[symbol:model.layers.22.self_attn.k_proj.weight] - tensor.CPU.register () -> (%204:tensor<[1024, 2048], Float32, CPU>[@model.layers.22.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=817), symbol:model.layers.22.self_attn.v_proj.weight])[symbol:model.layers.22.self_attn.v_proj.weight] - tensor.CPU.register () -> (%158:tensor<[128], Float32, CPU>[@model.layers.22.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=821), symbol:model.layers.22.self_attn.q_norm.weight])[symbol:model.layers.22.self_attn.q_norm.weight] - tensor.CPU.register () -> (%215:tensor<[128], Float32, CPU>[@model.layers.22.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=823), symbol:model.layers.22.self_attn.k_norm.weight])[symbol:model.layers.22.self_attn.k_norm.weight] - tensor.CPU.register () -> (%234:tensor<[2048, 2048], Float32, CPU>[@model.layers.22.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=836), symbol:model.layers.22.self_attn.o_proj.weight])[symbol:model.layers.22.self_attn.o_proj.weight] - tensor.CPU.register () -> (%270:tensor<[2048], Float32, CPU>[@model.layers.22.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839), symbol:model.layers.22.post_attention_layernorm.weight])[symbol:model.layers.22.post_attention_layernorm.weight] - tensor.CPU.register () -> (%198:tensor<[6144, 2048], Float32, CPU>[@model.layers.22.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=840), symbol:model.layers.22.mlp.gate_proj.weight])[symbol:model.layers.22.mlp.gate_proj.weight] - tensor.CPU.register () -> (%254:tensor<[6144, 2048], Float32, CPU>[@model.layers.22.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=843), symbol:model.layers.22.mlp.up_proj.weight])[symbol:model.layers.22.mlp.up_proj.weight] - tensor.CPU.register () -> (%31:tensor<[2048, 6144], Float32, CPU>[@model.layers.22.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=845), symbol:model.layers.22.mlp.down_proj.weight])[symbol:model.layers.22.mlp.down_proj.weight] - tensor.CPU.register () -> (%292:tensor<[2048], Float32, CPU>[@model.layers.23.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848), symbol:model.layers.23.input_layernorm.weight])[symbol:model.layers.23.input_layernorm.weight] - tensor.CPU.register () -> (%109:tensor<[2048, 2048], Float32, CPU>[@model.layers.23.self_attn.q_proj.weight][symbol:model.layers.23.self_attn.q_proj.weight])[symbol:model.layers.23.self_attn.q_proj.weight] - tensor.CPU.register () -> (%39:tensor<[1024, 2048], Float32, CPU>[@model.layers.23.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=849), symbol:model.layers.23.self_attn.k_proj.weight])[symbol:model.layers.23.self_attn.k_proj.weight] - tensor.CPU.register () -> (%83:tensor<[1024, 2048], Float32, CPU>[@model.layers.23.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=851), symbol:model.layers.23.self_attn.v_proj.weight])[symbol:model.layers.23.self_attn.v_proj.weight] - tensor.CPU.register () -> (%293:tensor<[128], Float32, CPU>[@model.layers.23.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=855), symbol:model.layers.23.self_attn.q_norm.weight])[symbol:model.layers.23.self_attn.q_norm.weight] - tensor.CPU.register () -> (%134:tensor<[128], Float32, CPU>[@model.layers.23.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857), symbol:model.layers.23.self_attn.k_norm.weight])[symbol:model.layers.23.self_attn.k_norm.weight] - tensor.CPU.register () -> (%176:tensor<[2048, 2048], Float32, CPU>[@model.layers.23.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=870), symbol:model.layers.23.self_attn.o_proj.weight])[symbol:model.layers.23.self_attn.o_proj.weight] - tensor.CPU.register () -> (%170:tensor<[2048], Float32, CPU>[@model.layers.23.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=873), symbol:model.layers.23.post_attention_layernorm.weight])[symbol:model.layers.23.post_attention_layernorm.weight] - tensor.CPU.register () -> (%169:tensor<[6144, 2048], Float32, CPU>[@model.layers.23.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=874), symbol:model.layers.23.mlp.gate_proj.weight])[symbol:model.layers.23.mlp.gate_proj.weight] - tensor.CPU.register () -> (%243:tensor<[6144, 2048], Float32, CPU>[@model.layers.23.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=877), symbol:model.layers.23.mlp.up_proj.weight])[symbol:model.layers.23.mlp.up_proj.weight] - tensor.CPU.register () -> (%149:tensor<[2048, 6144], Float32, CPU>[@model.layers.23.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=879), symbol:model.layers.23.mlp.down_proj.weight])[symbol:model.layers.23.mlp.down_proj.weight] - tensor.CPU.register () -> (%13:tensor<[2048], Float32, CPU>[@model.layers.24.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882), symbol:model.layers.24.input_layernorm.weight])[symbol:model.layers.24.input_layernorm.weight] - tensor.CPU.register () -> (%11:tensor<[2048, 2048], Float32, CPU>[@model.layers.24.self_attn.q_proj.weight][symbol:model.layers.24.self_attn.q_proj.weight])[symbol:model.layers.24.self_attn.q_proj.weight] - tensor.CPU.register () -> (%61:tensor<[1024, 2048], Float32, CPU>[@model.layers.24.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=883), symbol:model.layers.24.self_attn.k_proj.weight])[symbol:model.layers.24.self_attn.k_proj.weight] - tensor.CPU.register () -> (%81:tensor<[1024, 2048], Float32, CPU>[@model.layers.24.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=885), symbol:model.layers.24.self_attn.v_proj.weight])[symbol:model.layers.24.self_attn.v_proj.weight] - tensor.CPU.register () -> (%90:tensor<[128], Float32, CPU>[@model.layers.24.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=889), symbol:model.layers.24.self_attn.q_norm.weight])[symbol:model.layers.24.self_attn.q_norm.weight] - tensor.CPU.register () -> (%19:tensor<[128], Float32, CPU>[@model.layers.24.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=891), symbol:model.layers.24.self_attn.k_norm.weight])[symbol:model.layers.24.self_attn.k_norm.weight] - tensor.CPU.register () -> (%127:tensor<[2048, 2048], Float32, CPU>[@model.layers.24.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=904), symbol:model.layers.24.self_attn.o_proj.weight])[symbol:model.layers.24.self_attn.o_proj.weight] - tensor.CPU.register () -> (%77:tensor<[2048], Float32, CPU>[@model.layers.24.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907), symbol:model.layers.24.post_attention_layernorm.weight])[symbol:model.layers.24.post_attention_layernorm.weight] - tensor.CPU.register () -> (%141:tensor<[6144, 2048], Float32, CPU>[@model.layers.24.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=908), symbol:model.layers.24.mlp.gate_proj.weight])[symbol:model.layers.24.mlp.gate_proj.weight] - tensor.CPU.register () -> (%126:tensor<[6144, 2048], Float32, CPU>[@model.layers.24.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=911), symbol:model.layers.24.mlp.up_proj.weight])[symbol:model.layers.24.mlp.up_proj.weight] - tensor.CPU.register () -> (%34:tensor<[2048, 6144], Float32, CPU>[@model.layers.24.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=913), symbol:model.layers.24.mlp.down_proj.weight])[symbol:model.layers.24.mlp.down_proj.weight] - tensor.CPU.register () -> (%196:tensor<[2048], Float32, CPU>[@model.layers.25.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916), symbol:model.layers.25.input_layernorm.weight])[symbol:model.layers.25.input_layernorm.weight] - tensor.CPU.register () -> (%206:tensor<[2048, 2048], Float32, CPU>[@model.layers.25.self_attn.q_proj.weight][symbol:model.layers.25.self_attn.q_proj.weight])[symbol:model.layers.25.self_attn.q_proj.weight] - tensor.CPU.register () -> (%27:tensor<[1024, 2048], Float32, CPU>[@model.layers.25.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=917), symbol:model.layers.25.self_attn.k_proj.weight])[symbol:model.layers.25.self_attn.k_proj.weight] - tensor.CPU.register () -> (%121:tensor<[1024, 2048], Float32, CPU>[@model.layers.25.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=919), symbol:model.layers.25.self_attn.v_proj.weight])[symbol:model.layers.25.self_attn.v_proj.weight] - tensor.CPU.register () -> (%310:tensor<[128], Float32, CPU>[@model.layers.25.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=923), symbol:model.layers.25.self_attn.q_norm.weight])[symbol:model.layers.25.self_attn.q_norm.weight] - tensor.CPU.register () -> (%187:tensor<[128], Float32, CPU>[@model.layers.25.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=925), symbol:model.layers.25.self_attn.k_norm.weight])[symbol:model.layers.25.self_attn.k_norm.weight] - tensor.CPU.register () -> (%150:tensor<[2048, 2048], Float32, CPU>[@model.layers.25.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=938), symbol:model.layers.25.self_attn.o_proj.weight])[symbol:model.layers.25.self_attn.o_proj.weight] - tensor.CPU.register () -> (%175:tensor<[2048], Float32, CPU>[@model.layers.25.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=941), symbol:model.layers.25.post_attention_layernorm.weight])[symbol:model.layers.25.post_attention_layernorm.weight] - tensor.CPU.register () -> (%249:tensor<[6144, 2048], Float32, CPU>[@model.layers.25.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=942), symbol:model.layers.25.mlp.gate_proj.weight])[symbol:model.layers.25.mlp.gate_proj.weight] - tensor.CPU.register () -> (%159:tensor<[6144, 2048], Float32, CPU>[@model.layers.25.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=945), symbol:model.layers.25.mlp.up_proj.weight])[symbol:model.layers.25.mlp.up_proj.weight] - tensor.CPU.register () -> (%267:tensor<[2048, 6144], Float32, CPU>[@model.layers.25.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=947), symbol:model.layers.25.mlp.down_proj.weight])[symbol:model.layers.25.mlp.down_proj.weight] - tensor.CPU.register () -> (%302:tensor<[2048], Float32, CPU>[@model.layers.26.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950), symbol:model.layers.26.input_layernorm.weight])[symbol:model.layers.26.input_layernorm.weight] - tensor.CPU.register () -> (%265:tensor<[2048, 2048], Float32, CPU>[@model.layers.26.self_attn.q_proj.weight][symbol:model.layers.26.self_attn.q_proj.weight])[symbol:model.layers.26.self_attn.q_proj.weight] - tensor.CPU.register () -> (%190:tensor<[1024, 2048], Float32, CPU>[@model.layers.26.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=951), symbol:model.layers.26.self_attn.k_proj.weight])[symbol:model.layers.26.self_attn.k_proj.weight] - tensor.CPU.register () -> (%119:tensor<[1024, 2048], Float32, CPU>[@model.layers.26.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=953), symbol:model.layers.26.self_attn.v_proj.weight])[symbol:model.layers.26.self_attn.v_proj.weight] - tensor.CPU.register () -> (%70:tensor<[128], Float32, CPU>[@model.layers.26.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=957), symbol:model.layers.26.self_attn.q_norm.weight])[symbol:model.layers.26.self_attn.q_norm.weight] - tensor.CPU.register () -> (%35:tensor<[128], Float32, CPU>[@model.layers.26.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=959), symbol:model.layers.26.self_attn.k_norm.weight])[symbol:model.layers.26.self_attn.k_norm.weight] - tensor.CPU.register () -> (%88:tensor<[2048, 2048], Float32, CPU>[@model.layers.26.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=972), symbol:model.layers.26.self_attn.o_proj.weight])[symbol:model.layers.26.self_attn.o_proj.weight] - tensor.CPU.register () -> (%298:tensor<[2048], Float32, CPU>[@model.layers.26.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=975), symbol:model.layers.26.post_attention_layernorm.weight])[symbol:model.layers.26.post_attention_layernorm.weight] - tensor.CPU.register () -> (%96:tensor<[6144, 2048], Float32, CPU>[@model.layers.26.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=976), symbol:model.layers.26.mlp.gate_proj.weight])[symbol:model.layers.26.mlp.gate_proj.weight] - tensor.CPU.register () -> (%62:tensor<[6144, 2048], Float32, CPU>[@model.layers.26.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=979), symbol:model.layers.26.mlp.up_proj.weight])[symbol:model.layers.26.mlp.up_proj.weight] - tensor.CPU.register () -> (%220:tensor<[2048, 6144], Float32, CPU>[@model.layers.26.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=981), symbol:model.layers.26.mlp.down_proj.weight])[symbol:model.layers.26.mlp.down_proj.weight] - tensor.CPU.register () -> (%44:tensor<[2048], Float32, CPU>[@model.layers.27.input_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984), symbol:model.layers.27.input_layernorm.weight])[symbol:model.layers.27.input_layernorm.weight] - tensor.CPU.register () -> (%185:tensor<[2048, 2048], Float32, CPU>[@model.layers.27.self_attn.q_proj.weight][symbol:model.layers.27.self_attn.q_proj.weight])[symbol:model.layers.27.self_attn.q_proj.weight] - tensor.CPU.register () -> (%12:tensor<[1024, 2048], Float32, CPU>[@model.layers.27.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=985), symbol:model.layers.27.self_attn.k_proj.weight])[symbol:model.layers.27.self_attn.k_proj.weight] - tensor.CPU.register () -> (%54:tensor<[1024, 2048], Float32, CPU>[@model.layers.27.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=987), symbol:model.layers.27.self_attn.v_proj.weight])[symbol:model.layers.27.self_attn.v_proj.weight] - tensor.CPU.register () -> (%192:tensor<[128], Float32, CPU>[@model.layers.27.self_attn.q_norm.weight][qnn_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=991), symbol:model.layers.27.self_attn.q_norm.weight])[symbol:model.layers.27.self_attn.q_norm.weight] - tensor.CPU.register () -> (%241:tensor<[128], Float32, CPU>[@model.layers.27.self_attn.k_norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=993), symbol:model.layers.27.self_attn.k_norm.weight])[symbol:model.layers.27.self_attn.k_norm.weight] - tensor.CPU.register () -> (%60:tensor<[2048, 2048], Float32, CPU>[@model.layers.27.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1006), symbol:model.layers.27.self_attn.o_proj.weight])[symbol:model.layers.27.self_attn.o_proj.weight] - tensor.CPU.register () -> (%104:tensor<[2048], Float32, CPU>[@model.layers.27.post_attention_layernorm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1009), symbol:model.layers.27.post_attention_layernorm.weight])[symbol:model.layers.27.post_attention_layernorm.weight] - tensor.CPU.register () -> (%144:tensor<[6144, 2048], Float32, CPU>[@model.layers.27.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1010), symbol:model.layers.27.mlp.gate_proj.weight])[symbol:model.layers.27.mlp.gate_proj.weight] - tensor.CPU.register () -> (%146:tensor<[6144, 2048], Float32, CPU>[@model.layers.27.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1013), symbol:model.layers.27.mlp.up_proj.weight])[symbol:model.layers.27.mlp.up_proj.weight] - tensor.CPU.register () -> (%195:tensor<[2048, 6144], Float32, CPU>[@model.layers.27.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1015), symbol:model.layers.27.mlp.down_proj.weight])[symbol:model.layers.27.mlp.down_proj.weight] - tensor.CPU.register () -> (%5:tensor<[2048], Float32, CPU>[@model.norm.weight][qnn_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1018), symbol:model.norm.weight])[symbol:model.norm.weight] - tensor.CPU.register () -> (%101:tensor<[151936, 2048], Float32, CPU>[@lm_head.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1019), symbol:lm_head.weight])[symbol:lm_head.weight] + tensor.CPU.register () -> (%7516:tensor<[151936, 2048], Float32, CPU>[@model.embed_tokens.weight][quant_recipe:QuantSpec(Raw(type: Float32), uuid=61), symbol:model.embed_tokens.weight])[symbol:model.embed_tokens.weight] + tensor.CPU.register () -> (%8011:tensor<[1, 1024, 128], Int16PerTensor, CPU>[@rope_sin][symbol:rope_sin])[symbol:rope_sin] + tensor.CPU.register () -> (%8012:tensor<[1, 1024, 128], Int16PerTensor, CPU>[@rope_cos][symbol:rope_cos])[symbol:rope_cos] + tensor.CPU.register () -> (%6662:tensor<[2048], Float32, CPU>[@model.layers.0.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=67), symbol:model.layers.0.input_layernorm.weight])[symbol:model.layers.0.input_layernorm.weight] + tensor.CPU.register () -> (%7778:tensor<[2048, 2048], Float32, CPU>[@model.layers.0.self_attn.q_proj.weight][symbol:model.layers.0.self_attn.q_proj.weight])[symbol:model.layers.0.self_attn.q_proj.weight] + tensor.CPU.register () -> (%61:tensor<[1024, 2048], Float32, CPU>[@model.layers.0.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=68), symbol:model.layers.0.self_attn.k_proj.weight])[symbol:model.layers.0.self_attn.k_proj.weight] + tensor.CPU.register () -> (%5178:tensor<[1024, 2048], Float32, CPU>[@model.layers.0.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=70), symbol:model.layers.0.self_attn.v_proj.weight])[symbol:model.layers.0.self_attn.v_proj.weight] + tensor.CPU.register () -> (%1867:tensor<[128], Float32, CPU>[@model.layers.0.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=74), symbol:model.layers.0.self_attn.q_norm.weight])[symbol:model.layers.0.self_attn.q_norm.weight] + tensor.CPU.register () -> (%7469:tensor<[128], Float32, CPU>[@model.layers.0.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=76), symbol:model.layers.0.self_attn.k_norm.weight])[symbol:model.layers.0.self_attn.k_norm.weight] + tensor.CPU.register () -> (%7880:tensor<[2048, 2048], Float32, CPU>[@model.layers.0.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=89), symbol:model.layers.0.self_attn.o_proj.weight])[symbol:model.layers.0.self_attn.o_proj.weight] + tensor.CPU.register () -> (%3163:tensor<[2048], Float32, CPU>[@model.layers.0.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=92), symbol:model.layers.0.post_attention_layernorm.weight])[symbol:model.layers.0.post_attention_layernorm.weight] + tensor.CPU.register () -> (%3038:tensor<[6144, 2048], Float32, CPU>[@model.layers.0.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=93), symbol:model.layers.0.mlp.gate_proj.weight])[symbol:model.layers.0.mlp.gate_proj.weight] + tensor.CPU.register () -> (%184:tensor<[6144, 2048], Float32, CPU>[@model.layers.0.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=96), symbol:model.layers.0.mlp.up_proj.weight])[symbol:model.layers.0.mlp.up_proj.weight] + tensor.CPU.register () -> (%7449:tensor<[2048, 6144], Float32, CPU>[@model.layers.0.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=98), symbol:model.layers.0.mlp.down_proj.weight])[symbol:model.layers.0.mlp.down_proj.weight] + tensor.CPU.register () -> (%3526:tensor<[2048], Float32, CPU>[@model.layers.1.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=101), symbol:model.layers.1.input_layernorm.weight])[symbol:model.layers.1.input_layernorm.weight] + tensor.CPU.register () -> (%2471:tensor<[2048, 2048], Float32, CPU>[@model.layers.1.self_attn.q_proj.weight][symbol:model.layers.1.self_attn.q_proj.weight])[symbol:model.layers.1.self_attn.q_proj.weight] + tensor.CPU.register () -> (%5492:tensor<[1024, 2048], Float32, CPU>[@model.layers.1.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=102), symbol:model.layers.1.self_attn.k_proj.weight])[symbol:model.layers.1.self_attn.k_proj.weight] + tensor.CPU.register () -> (%554:tensor<[1024, 2048], Float32, CPU>[@model.layers.1.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=104), symbol:model.layers.1.self_attn.v_proj.weight])[symbol:model.layers.1.self_attn.v_proj.weight] + tensor.CPU.register () -> (%5159:tensor<[128], Float32, CPU>[@model.layers.1.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=108), symbol:model.layers.1.self_attn.q_norm.weight])[symbol:model.layers.1.self_attn.q_norm.weight] + tensor.CPU.register () -> (%6337:tensor<[128], Float32, CPU>[@model.layers.1.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=110), symbol:model.layers.1.self_attn.k_norm.weight])[symbol:model.layers.1.self_attn.k_norm.weight] + tensor.CPU.register () -> (%3431:tensor<[2048, 2048], Float32, CPU>[@model.layers.1.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=123), symbol:model.layers.1.self_attn.o_proj.weight])[symbol:model.layers.1.self_attn.o_proj.weight] + tensor.CPU.register () -> (%7183:tensor<[2048], Float32, CPU>[@model.layers.1.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=126), symbol:model.layers.1.post_attention_layernorm.weight])[symbol:model.layers.1.post_attention_layernorm.weight] + tensor.CPU.register () -> (%6960:tensor<[6144, 2048], Float32, CPU>[@model.layers.1.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=127), symbol:model.layers.1.mlp.gate_proj.weight])[symbol:model.layers.1.mlp.gate_proj.weight] + tensor.CPU.register () -> (%7251:tensor<[6144, 2048], Float32, CPU>[@model.layers.1.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=130), symbol:model.layers.1.mlp.up_proj.weight])[symbol:model.layers.1.mlp.up_proj.weight] + tensor.CPU.register () -> (%6256:tensor<[2048, 6144], Float32, CPU>[@model.layers.1.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=132), symbol:model.layers.1.mlp.down_proj.weight])[symbol:model.layers.1.mlp.down_proj.weight] + tensor.CPU.register () -> (%7411:tensor<[2048], Float32, CPU>[@model.layers.2.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=135), symbol:model.layers.2.input_layernorm.weight])[symbol:model.layers.2.input_layernorm.weight] + tensor.CPU.register () -> (%4879:tensor<[2048, 2048], Float32, CPU>[@model.layers.2.self_attn.q_proj.weight][symbol:model.layers.2.self_attn.q_proj.weight])[symbol:model.layers.2.self_attn.q_proj.weight] + tensor.CPU.register () -> (%725:tensor<[1024, 2048], Float32, CPU>[@model.layers.2.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=136), symbol:model.layers.2.self_attn.k_proj.weight])[symbol:model.layers.2.self_attn.k_proj.weight] + tensor.CPU.register () -> (%2701:tensor<[1024, 2048], Float32, CPU>[@model.layers.2.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=138), symbol:model.layers.2.self_attn.v_proj.weight])[symbol:model.layers.2.self_attn.v_proj.weight] + tensor.CPU.register () -> (%7660:tensor<[128], Float32, CPU>[@model.layers.2.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=142), symbol:model.layers.2.self_attn.q_norm.weight])[symbol:model.layers.2.self_attn.q_norm.weight] + tensor.CPU.register () -> (%5749:tensor<[128], Float32, CPU>[@model.layers.2.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=144), symbol:model.layers.2.self_attn.k_norm.weight])[symbol:model.layers.2.self_attn.k_norm.weight] + tensor.CPU.register () -> (%1525:tensor<[2048, 2048], Float32, CPU>[@model.layers.2.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=157), symbol:model.layers.2.self_attn.o_proj.weight])[symbol:model.layers.2.self_attn.o_proj.weight] + tensor.CPU.register () -> (%6444:tensor<[2048], Float32, CPU>[@model.layers.2.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=160), symbol:model.layers.2.post_attention_layernorm.weight])[symbol:model.layers.2.post_attention_layernorm.weight] + tensor.CPU.register () -> (%3201:tensor<[6144, 2048], Float32, CPU>[@model.layers.2.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=161), symbol:model.layers.2.mlp.gate_proj.weight])[symbol:model.layers.2.mlp.gate_proj.weight] + tensor.CPU.register () -> (%4120:tensor<[6144, 2048], Float32, CPU>[@model.layers.2.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=164), symbol:model.layers.2.mlp.up_proj.weight])[symbol:model.layers.2.mlp.up_proj.weight] + tensor.CPU.register () -> (%1962:tensor<[2048, 6144], Float32, CPU>[@model.layers.2.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=166), symbol:model.layers.2.mlp.down_proj.weight])[symbol:model.layers.2.mlp.down_proj.weight] + tensor.CPU.register () -> (%3250:tensor<[2048], Float32, CPU>[@model.layers.3.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=169), symbol:model.layers.3.input_layernorm.weight])[symbol:model.layers.3.input_layernorm.weight] + tensor.CPU.register () -> (%5564:tensor<[2048, 2048], Float32, CPU>[@model.layers.3.self_attn.q_proj.weight][symbol:model.layers.3.self_attn.q_proj.weight])[symbol:model.layers.3.self_attn.q_proj.weight] + tensor.CPU.register () -> (%3502:tensor<[1024, 2048], Float32, CPU>[@model.layers.3.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=170), symbol:model.layers.3.self_attn.k_proj.weight])[symbol:model.layers.3.self_attn.k_proj.weight] + tensor.CPU.register () -> (%2402:tensor<[1024, 2048], Float32, CPU>[@model.layers.3.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=172), symbol:model.layers.3.self_attn.v_proj.weight])[symbol:model.layers.3.self_attn.v_proj.weight] + tensor.CPU.register () -> (%1747:tensor<[128], Float32, CPU>[@model.layers.3.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=176), symbol:model.layers.3.self_attn.q_norm.weight])[symbol:model.layers.3.self_attn.q_norm.weight] + tensor.CPU.register () -> (%4846:tensor<[128], Float32, CPU>[@model.layers.3.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=178), symbol:model.layers.3.self_attn.k_norm.weight])[symbol:model.layers.3.self_attn.k_norm.weight] + tensor.CPU.register () -> (%3109:tensor<[2048, 2048], Float32, CPU>[@model.layers.3.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=191), symbol:model.layers.3.self_attn.o_proj.weight])[symbol:model.layers.3.self_attn.o_proj.weight] + tensor.CPU.register () -> (%7221:tensor<[2048], Float32, CPU>[@model.layers.3.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=194), symbol:model.layers.3.post_attention_layernorm.weight])[symbol:model.layers.3.post_attention_layernorm.weight] + tensor.CPU.register () -> (%7181:tensor<[6144, 2048], Float32, CPU>[@model.layers.3.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=195), symbol:model.layers.3.mlp.gate_proj.weight])[symbol:model.layers.3.mlp.gate_proj.weight] + tensor.CPU.register () -> (%2714:tensor<[6144, 2048], Float32, CPU>[@model.layers.3.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=198), symbol:model.layers.3.mlp.up_proj.weight])[symbol:model.layers.3.mlp.up_proj.weight] + tensor.CPU.register () -> (%4573:tensor<[2048, 6144], Float32, CPU>[@model.layers.3.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=200), symbol:model.layers.3.mlp.down_proj.weight])[symbol:model.layers.3.mlp.down_proj.weight] + tensor.CPU.register () -> (%5536:tensor<[2048], Float32, CPU>[@model.layers.4.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=203), symbol:model.layers.4.input_layernorm.weight])[symbol:model.layers.4.input_layernorm.weight] + tensor.CPU.register () -> (%463:tensor<[2048, 2048], Float32, CPU>[@model.layers.4.self_attn.q_proj.weight][symbol:model.layers.4.self_attn.q_proj.weight])[symbol:model.layers.4.self_attn.q_proj.weight] + tensor.CPU.register () -> (%5989:tensor<[1024, 2048], Float32, CPU>[@model.layers.4.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=204), symbol:model.layers.4.self_attn.k_proj.weight])[symbol:model.layers.4.self_attn.k_proj.weight] + tensor.CPU.register () -> (%3443:tensor<[1024, 2048], Float32, CPU>[@model.layers.4.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=206), symbol:model.layers.4.self_attn.v_proj.weight])[symbol:model.layers.4.self_attn.v_proj.weight] + tensor.CPU.register () -> (%926:tensor<[128], Float32, CPU>[@model.layers.4.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=210), symbol:model.layers.4.self_attn.q_norm.weight])[symbol:model.layers.4.self_attn.q_norm.weight] + tensor.CPU.register () -> (%5648:tensor<[128], Float32, CPU>[@model.layers.4.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=212), symbol:model.layers.4.self_attn.k_norm.weight])[symbol:model.layers.4.self_attn.k_norm.weight] + tensor.CPU.register () -> (%256:tensor<[2048, 2048], Float32, CPU>[@model.layers.4.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=225), symbol:model.layers.4.self_attn.o_proj.weight])[symbol:model.layers.4.self_attn.o_proj.weight] + tensor.CPU.register () -> (%3101:tensor<[2048], Float32, CPU>[@model.layers.4.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=228), symbol:model.layers.4.post_attention_layernorm.weight])[symbol:model.layers.4.post_attention_layernorm.weight] + tensor.CPU.register () -> (%15:tensor<[6144, 2048], Float32, CPU>[@model.layers.4.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=229), symbol:model.layers.4.mlp.gate_proj.weight])[symbol:model.layers.4.mlp.gate_proj.weight] + tensor.CPU.register () -> (%3494:tensor<[6144, 2048], Float32, CPU>[@model.layers.4.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=232), symbol:model.layers.4.mlp.up_proj.weight])[symbol:model.layers.4.mlp.up_proj.weight] + tensor.CPU.register () -> (%6518:tensor<[2048, 6144], Float32, CPU>[@model.layers.4.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=234), symbol:model.layers.4.mlp.down_proj.weight])[symbol:model.layers.4.mlp.down_proj.weight] + tensor.CPU.register () -> (%7246:tensor<[2048], Float32, CPU>[@model.layers.5.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=237), symbol:model.layers.5.input_layernorm.weight])[symbol:model.layers.5.input_layernorm.weight] + tensor.CPU.register () -> (%3752:tensor<[2048, 2048], Float32, CPU>[@model.layers.5.self_attn.q_proj.weight][symbol:model.layers.5.self_attn.q_proj.weight])[symbol:model.layers.5.self_attn.q_proj.weight] + tensor.CPU.register () -> (%2143:tensor<[1024, 2048], Float32, CPU>[@model.layers.5.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=238), symbol:model.layers.5.self_attn.k_proj.weight])[symbol:model.layers.5.self_attn.k_proj.weight] + tensor.CPU.register () -> (%5753:tensor<[1024, 2048], Float32, CPU>[@model.layers.5.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=240), symbol:model.layers.5.self_attn.v_proj.weight])[symbol:model.layers.5.self_attn.v_proj.weight] + tensor.CPU.register () -> (%4774:tensor<[128], Float32, CPU>[@model.layers.5.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=244), symbol:model.layers.5.self_attn.q_norm.weight])[symbol:model.layers.5.self_attn.q_norm.weight] + tensor.CPU.register () -> (%1215:tensor<[128], Float32, CPU>[@model.layers.5.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=246), symbol:model.layers.5.self_attn.k_norm.weight])[symbol:model.layers.5.self_attn.k_norm.weight] + tensor.CPU.register () -> (%2076:tensor<[2048, 2048], Float32, CPU>[@model.layers.5.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=259), symbol:model.layers.5.self_attn.o_proj.weight])[symbol:model.layers.5.self_attn.o_proj.weight] + tensor.CPU.register () -> (%6883:tensor<[2048], Float32, CPU>[@model.layers.5.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=262), symbol:model.layers.5.post_attention_layernorm.weight])[symbol:model.layers.5.post_attention_layernorm.weight] + tensor.CPU.register () -> (%5485:tensor<[6144, 2048], Float32, CPU>[@model.layers.5.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=263), symbol:model.layers.5.mlp.gate_proj.weight])[symbol:model.layers.5.mlp.gate_proj.weight] + tensor.CPU.register () -> (%759:tensor<[6144, 2048], Float32, CPU>[@model.layers.5.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=266), symbol:model.layers.5.mlp.up_proj.weight])[symbol:model.layers.5.mlp.up_proj.weight] + tensor.CPU.register () -> (%6315:tensor<[2048, 6144], Float32, CPU>[@model.layers.5.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=268), symbol:model.layers.5.mlp.down_proj.weight])[symbol:model.layers.5.mlp.down_proj.weight] + tensor.CPU.register () -> (%7090:tensor<[2048], Float32, CPU>[@model.layers.6.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=271), symbol:model.layers.6.input_layernorm.weight])[symbol:model.layers.6.input_layernorm.weight] + tensor.CPU.register () -> (%3125:tensor<[2048, 2048], Float32, CPU>[@model.layers.6.self_attn.q_proj.weight][symbol:model.layers.6.self_attn.q_proj.weight])[symbol:model.layers.6.self_attn.q_proj.weight] + tensor.CPU.register () -> (%1798:tensor<[1024, 2048], Float32, CPU>[@model.layers.6.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=272), symbol:model.layers.6.self_attn.k_proj.weight])[symbol:model.layers.6.self_attn.k_proj.weight] + tensor.CPU.register () -> (%1047:tensor<[1024, 2048], Float32, CPU>[@model.layers.6.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=274), symbol:model.layers.6.self_attn.v_proj.weight])[symbol:model.layers.6.self_attn.v_proj.weight] + tensor.CPU.register () -> (%7385:tensor<[128], Float32, CPU>[@model.layers.6.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=278), symbol:model.layers.6.self_attn.q_norm.weight])[symbol:model.layers.6.self_attn.q_norm.weight] + tensor.CPU.register () -> (%5603:tensor<[128], Float32, CPU>[@model.layers.6.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=280), symbol:model.layers.6.self_attn.k_norm.weight])[symbol:model.layers.6.self_attn.k_norm.weight] + tensor.CPU.register () -> (%6862:tensor<[2048, 2048], Float32, CPU>[@model.layers.6.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=293), symbol:model.layers.6.self_attn.o_proj.weight])[symbol:model.layers.6.self_attn.o_proj.weight] + tensor.CPU.register () -> (%4161:tensor<[2048], Float32, CPU>[@model.layers.6.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=296), symbol:model.layers.6.post_attention_layernorm.weight])[symbol:model.layers.6.post_attention_layernorm.weight] + tensor.CPU.register () -> (%5295:tensor<[6144, 2048], Float32, CPU>[@model.layers.6.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=297), symbol:model.layers.6.mlp.gate_proj.weight])[symbol:model.layers.6.mlp.gate_proj.weight] + tensor.CPU.register () -> (%4710:tensor<[6144, 2048], Float32, CPU>[@model.layers.6.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=300), symbol:model.layers.6.mlp.up_proj.weight])[symbol:model.layers.6.mlp.up_proj.weight] + tensor.CPU.register () -> (%4929:tensor<[2048, 6144], Float32, CPU>[@model.layers.6.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=302), symbol:model.layers.6.mlp.down_proj.weight])[symbol:model.layers.6.mlp.down_proj.weight] + tensor.CPU.register () -> (%4605:tensor<[2048], Float32, CPU>[@model.layers.7.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=305), symbol:model.layers.7.input_layernorm.weight])[symbol:model.layers.7.input_layernorm.weight] + tensor.CPU.register () -> (%4585:tensor<[2048, 2048], Float32, CPU>[@model.layers.7.self_attn.q_proj.weight][symbol:model.layers.7.self_attn.q_proj.weight])[symbol:model.layers.7.self_attn.q_proj.weight] + tensor.CPU.register () -> (%1:tensor<[1024, 2048], Float32, CPU>[@model.layers.7.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=306), symbol:model.layers.7.self_attn.k_proj.weight])[symbol:model.layers.7.self_attn.k_proj.weight] + tensor.CPU.register () -> (%2341:tensor<[1024, 2048], Float32, CPU>[@model.layers.7.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=308), symbol:model.layers.7.self_attn.v_proj.weight])[symbol:model.layers.7.self_attn.v_proj.weight] + tensor.CPU.register () -> (%5151:tensor<[128], Float32, CPU>[@model.layers.7.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=312), symbol:model.layers.7.self_attn.q_norm.weight])[symbol:model.layers.7.self_attn.q_norm.weight] + tensor.CPU.register () -> (%3437:tensor<[128], Float32, CPU>[@model.layers.7.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=314), symbol:model.layers.7.self_attn.k_norm.weight])[symbol:model.layers.7.self_attn.k_norm.weight] + tensor.CPU.register () -> (%3368:tensor<[2048, 2048], Float32, CPU>[@model.layers.7.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=327), symbol:model.layers.7.self_attn.o_proj.weight])[symbol:model.layers.7.self_attn.o_proj.weight] + tensor.CPU.register () -> (%68:tensor<[2048], Float32, CPU>[@model.layers.7.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=330), symbol:model.layers.7.post_attention_layernorm.weight])[symbol:model.layers.7.post_attention_layernorm.weight] + tensor.CPU.register () -> (%324:tensor<[6144, 2048], Float32, CPU>[@model.layers.7.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=331), symbol:model.layers.7.mlp.gate_proj.weight])[symbol:model.layers.7.mlp.gate_proj.weight] + tensor.CPU.register () -> (%5551:tensor<[6144, 2048], Float32, CPU>[@model.layers.7.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=334), symbol:model.layers.7.mlp.up_proj.weight])[symbol:model.layers.7.mlp.up_proj.weight] + tensor.CPU.register () -> (%7894:tensor<[2048, 6144], Float32, CPU>[@model.layers.7.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=336), symbol:model.layers.7.mlp.down_proj.weight])[symbol:model.layers.7.mlp.down_proj.weight] + tensor.CPU.register () -> (%3851:tensor<[2048], Float32, CPU>[@model.layers.8.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=339), symbol:model.layers.8.input_layernorm.weight])[symbol:model.layers.8.input_layernorm.weight] + tensor.CPU.register () -> (%5874:tensor<[2048, 2048], Float32, CPU>[@model.layers.8.self_attn.q_proj.weight][symbol:model.layers.8.self_attn.q_proj.weight])[symbol:model.layers.8.self_attn.q_proj.weight] + tensor.CPU.register () -> (%1863:tensor<[1024, 2048], Float32, CPU>[@model.layers.8.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=340), symbol:model.layers.8.self_attn.k_proj.weight])[symbol:model.layers.8.self_attn.k_proj.weight] + tensor.CPU.register () -> (%3204:tensor<[1024, 2048], Float32, CPU>[@model.layers.8.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=342), symbol:model.layers.8.self_attn.v_proj.weight])[symbol:model.layers.8.self_attn.v_proj.weight] + tensor.CPU.register () -> (%2301:tensor<[128], Float32, CPU>[@model.layers.8.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=346), symbol:model.layers.8.self_attn.q_norm.weight])[symbol:model.layers.8.self_attn.q_norm.weight] + tensor.CPU.register () -> (%7373:tensor<[128], Float32, CPU>[@model.layers.8.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=348), symbol:model.layers.8.self_attn.k_norm.weight])[symbol:model.layers.8.self_attn.k_norm.weight] + tensor.CPU.register () -> (%6303:tensor<[2048, 2048], Float32, CPU>[@model.layers.8.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=361), symbol:model.layers.8.self_attn.o_proj.weight])[symbol:model.layers.8.self_attn.o_proj.weight] + tensor.CPU.register () -> (%1997:tensor<[2048], Float32, CPU>[@model.layers.8.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364), symbol:model.layers.8.post_attention_layernorm.weight])[symbol:model.layers.8.post_attention_layernorm.weight] + tensor.CPU.register () -> (%6731:tensor<[6144, 2048], Float32, CPU>[@model.layers.8.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=365), symbol:model.layers.8.mlp.gate_proj.weight])[symbol:model.layers.8.mlp.gate_proj.weight] + tensor.CPU.register () -> (%5478:tensor<[6144, 2048], Float32, CPU>[@model.layers.8.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=368), symbol:model.layers.8.mlp.up_proj.weight])[symbol:model.layers.8.mlp.up_proj.weight] + tensor.CPU.register () -> (%4734:tensor<[2048, 6144], Float32, CPU>[@model.layers.8.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=370), symbol:model.layers.8.mlp.down_proj.weight])[symbol:model.layers.8.mlp.down_proj.weight] + tensor.CPU.register () -> (%4963:tensor<[2048], Float32, CPU>[@model.layers.9.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=373), symbol:model.layers.9.input_layernorm.weight])[symbol:model.layers.9.input_layernorm.weight] + tensor.CPU.register () -> (%137:tensor<[2048, 2048], Float32, CPU>[@model.layers.9.self_attn.q_proj.weight][symbol:model.layers.9.self_attn.q_proj.weight])[symbol:model.layers.9.self_attn.q_proj.weight] + tensor.CPU.register () -> (%2689:tensor<[1024, 2048], Float32, CPU>[@model.layers.9.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=374), symbol:model.layers.9.self_attn.k_proj.weight])[symbol:model.layers.9.self_attn.k_proj.weight] + tensor.CPU.register () -> (%4027:tensor<[1024, 2048], Float32, CPU>[@model.layers.9.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=376), symbol:model.layers.9.self_attn.v_proj.weight])[symbol:model.layers.9.self_attn.v_proj.weight] + tensor.CPU.register () -> (%1375:tensor<[128], Float32, CPU>[@model.layers.9.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=380), symbol:model.layers.9.self_attn.q_norm.weight])[symbol:model.layers.9.self_attn.q_norm.weight] + tensor.CPU.register () -> (%4962:tensor<[128], Float32, CPU>[@model.layers.9.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=382), symbol:model.layers.9.self_attn.k_norm.weight])[symbol:model.layers.9.self_attn.k_norm.weight] + tensor.CPU.register () -> (%6399:tensor<[2048, 2048], Float32, CPU>[@model.layers.9.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=395), symbol:model.layers.9.self_attn.o_proj.weight])[symbol:model.layers.9.self_attn.o_proj.weight] + tensor.CPU.register () -> (%2594:tensor<[2048], Float32, CPU>[@model.layers.9.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=398), symbol:model.layers.9.post_attention_layernorm.weight])[symbol:model.layers.9.post_attention_layernorm.weight] + tensor.CPU.register () -> (%3833:tensor<[6144, 2048], Float32, CPU>[@model.layers.9.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=399), symbol:model.layers.9.mlp.gate_proj.weight])[symbol:model.layers.9.mlp.gate_proj.weight] + tensor.CPU.register () -> (%2358:tensor<[6144, 2048], Float32, CPU>[@model.layers.9.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=402), symbol:model.layers.9.mlp.up_proj.weight])[symbol:model.layers.9.mlp.up_proj.weight] + tensor.CPU.register () -> (%3947:tensor<[2048, 6144], Float32, CPU>[@model.layers.9.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=404), symbol:model.layers.9.mlp.down_proj.weight])[symbol:model.layers.9.mlp.down_proj.weight] + tensor.CPU.register () -> (%3229:tensor<[2048], Float32, CPU>[@model.layers.10.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=407), symbol:model.layers.10.input_layernorm.weight])[symbol:model.layers.10.input_layernorm.weight] + tensor.CPU.register () -> (%5022:tensor<[2048, 2048], Float32, CPU>[@model.layers.10.self_attn.q_proj.weight][symbol:model.layers.10.self_attn.q_proj.weight])[symbol:model.layers.10.self_attn.q_proj.weight] + tensor.CPU.register () -> (%2867:tensor<[1024, 2048], Float32, CPU>[@model.layers.10.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=408), symbol:model.layers.10.self_attn.k_proj.weight])[symbol:model.layers.10.self_attn.k_proj.weight] + tensor.CPU.register () -> (%567:tensor<[1024, 2048], Float32, CPU>[@model.layers.10.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=410), symbol:model.layers.10.self_attn.v_proj.weight])[symbol:model.layers.10.self_attn.v_proj.weight] + tensor.CPU.register () -> (%7008:tensor<[128], Float32, CPU>[@model.layers.10.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=414), symbol:model.layers.10.self_attn.q_norm.weight])[symbol:model.layers.10.self_attn.q_norm.weight] + tensor.CPU.register () -> (%6953:tensor<[128], Float32, CPU>[@model.layers.10.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=416), symbol:model.layers.10.self_attn.k_norm.weight])[symbol:model.layers.10.self_attn.k_norm.weight] + tensor.CPU.register () -> (%5479:tensor<[2048, 2048], Float32, CPU>[@model.layers.10.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=429), symbol:model.layers.10.self_attn.o_proj.weight])[symbol:model.layers.10.self_attn.o_proj.weight] + tensor.CPU.register () -> (%3177:tensor<[2048], Float32, CPU>[@model.layers.10.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=432), symbol:model.layers.10.post_attention_layernorm.weight])[symbol:model.layers.10.post_attention_layernorm.weight] + tensor.CPU.register () -> (%7857:tensor<[6144, 2048], Float32, CPU>[@model.layers.10.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=433), symbol:model.layers.10.mlp.gate_proj.weight])[symbol:model.layers.10.mlp.gate_proj.weight] + tensor.CPU.register () -> (%3620:tensor<[6144, 2048], Float32, CPU>[@model.layers.10.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=436), symbol:model.layers.10.mlp.up_proj.weight])[symbol:model.layers.10.mlp.up_proj.weight] + tensor.CPU.register () -> (%4172:tensor<[2048, 6144], Float32, CPU>[@model.layers.10.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=438), symbol:model.layers.10.mlp.down_proj.weight])[symbol:model.layers.10.mlp.down_proj.weight] + tensor.CPU.register () -> (%1820:tensor<[2048], Float32, CPU>[@model.layers.11.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=441), symbol:model.layers.11.input_layernorm.weight])[symbol:model.layers.11.input_layernorm.weight] + tensor.CPU.register () -> (%4375:tensor<[2048, 2048], Float32, CPU>[@model.layers.11.self_attn.q_proj.weight][symbol:model.layers.11.self_attn.q_proj.weight])[symbol:model.layers.11.self_attn.q_proj.weight] + tensor.CPU.register () -> (%3805:tensor<[1024, 2048], Float32, CPU>[@model.layers.11.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=442), symbol:model.layers.11.self_attn.k_proj.weight])[symbol:model.layers.11.self_attn.k_proj.weight] + tensor.CPU.register () -> (%5348:tensor<[1024, 2048], Float32, CPU>[@model.layers.11.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=444), symbol:model.layers.11.self_attn.v_proj.weight])[symbol:model.layers.11.self_attn.v_proj.weight] + tensor.CPU.register () -> (%1018:tensor<[128], Float32, CPU>[@model.layers.11.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=448), symbol:model.layers.11.self_attn.q_norm.weight])[symbol:model.layers.11.self_attn.q_norm.weight] + tensor.CPU.register () -> (%5323:tensor<[128], Float32, CPU>[@model.layers.11.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=450), symbol:model.layers.11.self_attn.k_norm.weight])[symbol:model.layers.11.self_attn.k_norm.weight] + tensor.CPU.register () -> (%6587:tensor<[2048, 2048], Float32, CPU>[@model.layers.11.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=463), symbol:model.layers.11.self_attn.o_proj.weight])[symbol:model.layers.11.self_attn.o_proj.weight] + tensor.CPU.register () -> (%2072:tensor<[2048], Float32, CPU>[@model.layers.11.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=466), symbol:model.layers.11.post_attention_layernorm.weight])[symbol:model.layers.11.post_attention_layernorm.weight] + tensor.CPU.register () -> (%5180:tensor<[6144, 2048], Float32, CPU>[@model.layers.11.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=467), symbol:model.layers.11.mlp.gate_proj.weight])[symbol:model.layers.11.mlp.gate_proj.weight] + tensor.CPU.register () -> (%1917:tensor<[6144, 2048], Float32, CPU>[@model.layers.11.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=470), symbol:model.layers.11.mlp.up_proj.weight])[symbol:model.layers.11.mlp.up_proj.weight] + tensor.CPU.register () -> (%2810:tensor<[2048, 6144], Float32, CPU>[@model.layers.11.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=472), symbol:model.layers.11.mlp.down_proj.weight])[symbol:model.layers.11.mlp.down_proj.weight] + tensor.CPU.register () -> (%4945:tensor<[2048], Float32, CPU>[@model.layers.12.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=475), symbol:model.layers.12.input_layernorm.weight])[symbol:model.layers.12.input_layernorm.weight] + tensor.CPU.register () -> (%6926:tensor<[2048, 2048], Float32, CPU>[@model.layers.12.self_attn.q_proj.weight][symbol:model.layers.12.self_attn.q_proj.weight])[symbol:model.layers.12.self_attn.q_proj.weight] + tensor.CPU.register () -> (%2741:tensor<[1024, 2048], Float32, CPU>[@model.layers.12.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=476), symbol:model.layers.12.self_attn.k_proj.weight])[symbol:model.layers.12.self_attn.k_proj.weight] + tensor.CPU.register () -> (%3690:tensor<[1024, 2048], Float32, CPU>[@model.layers.12.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=478), symbol:model.layers.12.self_attn.v_proj.weight])[symbol:model.layers.12.self_attn.v_proj.weight] + tensor.CPU.register () -> (%5447:tensor<[128], Float32, CPU>[@model.layers.12.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=482), symbol:model.layers.12.self_attn.q_norm.weight])[symbol:model.layers.12.self_attn.q_norm.weight] + tensor.CPU.register () -> (%5437:tensor<[128], Float32, CPU>[@model.layers.12.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484), symbol:model.layers.12.self_attn.k_norm.weight])[symbol:model.layers.12.self_attn.k_norm.weight] + tensor.CPU.register () -> (%4785:tensor<[2048, 2048], Float32, CPU>[@model.layers.12.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=497), symbol:model.layers.12.self_attn.o_proj.weight])[symbol:model.layers.12.self_attn.o_proj.weight] + tensor.CPU.register () -> (%1343:tensor<[2048], Float32, CPU>[@model.layers.12.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=500), symbol:model.layers.12.post_attention_layernorm.weight])[symbol:model.layers.12.post_attention_layernorm.weight] + tensor.CPU.register () -> (%3306:tensor<[6144, 2048], Float32, CPU>[@model.layers.12.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=501), symbol:model.layers.12.mlp.gate_proj.weight])[symbol:model.layers.12.mlp.gate_proj.weight] + tensor.CPU.register () -> (%2123:tensor<[6144, 2048], Float32, CPU>[@model.layers.12.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=504), symbol:model.layers.12.mlp.up_proj.weight])[symbol:model.layers.12.mlp.up_proj.weight] + tensor.CPU.register () -> (%2005:tensor<[2048, 6144], Float32, CPU>[@model.layers.12.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=506), symbol:model.layers.12.mlp.down_proj.weight])[symbol:model.layers.12.mlp.down_proj.weight] + tensor.CPU.register () -> (%1812:tensor<[2048], Float32, CPU>[@model.layers.13.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=509), symbol:model.layers.13.input_layernorm.weight])[symbol:model.layers.13.input_layernorm.weight] + tensor.CPU.register () -> (%7043:tensor<[2048, 2048], Float32, CPU>[@model.layers.13.self_attn.q_proj.weight][symbol:model.layers.13.self_attn.q_proj.weight])[symbol:model.layers.13.self_attn.q_proj.weight] + tensor.CPU.register () -> (%229:tensor<[1024, 2048], Float32, CPU>[@model.layers.13.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=510), symbol:model.layers.13.self_attn.k_proj.weight])[symbol:model.layers.13.self_attn.k_proj.weight] + tensor.CPU.register () -> (%1019:tensor<[1024, 2048], Float32, CPU>[@model.layers.13.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=512), symbol:model.layers.13.self_attn.v_proj.weight])[symbol:model.layers.13.self_attn.v_proj.weight] + tensor.CPU.register () -> (%3318:tensor<[128], Float32, CPU>[@model.layers.13.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=516), symbol:model.layers.13.self_attn.q_norm.weight])[symbol:model.layers.13.self_attn.q_norm.weight] + tensor.CPU.register () -> (%2503:tensor<[128], Float32, CPU>[@model.layers.13.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=518), symbol:model.layers.13.self_attn.k_norm.weight])[symbol:model.layers.13.self_attn.k_norm.weight] + tensor.CPU.register () -> (%3883:tensor<[2048, 2048], Float32, CPU>[@model.layers.13.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=531), symbol:model.layers.13.self_attn.o_proj.weight])[symbol:model.layers.13.self_attn.o_proj.weight] + tensor.CPU.register () -> (%6904:tensor<[2048], Float32, CPU>[@model.layers.13.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=534), symbol:model.layers.13.post_attention_layernorm.weight])[symbol:model.layers.13.post_attention_layernorm.weight] + tensor.CPU.register () -> (%5444:tensor<[6144, 2048], Float32, CPU>[@model.layers.13.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=535), symbol:model.layers.13.mlp.gate_proj.weight])[symbol:model.layers.13.mlp.gate_proj.weight] + tensor.CPU.register () -> (%3100:tensor<[6144, 2048], Float32, CPU>[@model.layers.13.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=538), symbol:model.layers.13.mlp.up_proj.weight])[symbol:model.layers.13.mlp.up_proj.weight] + tensor.CPU.register () -> (%6631:tensor<[2048, 6144], Float32, CPU>[@model.layers.13.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=540), symbol:model.layers.13.mlp.down_proj.weight])[symbol:model.layers.13.mlp.down_proj.weight] + tensor.CPU.register () -> (%5555:tensor<[2048], Float32, CPU>[@model.layers.14.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=543), symbol:model.layers.14.input_layernorm.weight])[symbol:model.layers.14.input_layernorm.weight] + tensor.CPU.register () -> (%1210:tensor<[2048, 2048], Float32, CPU>[@model.layers.14.self_attn.q_proj.weight][symbol:model.layers.14.self_attn.q_proj.weight])[symbol:model.layers.14.self_attn.q_proj.weight] + tensor.CPU.register () -> (%3756:tensor<[1024, 2048], Float32, CPU>[@model.layers.14.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=544), symbol:model.layers.14.self_attn.k_proj.weight])[symbol:model.layers.14.self_attn.k_proj.weight] + tensor.CPU.register () -> (%5243:tensor<[1024, 2048], Float32, CPU>[@model.layers.14.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=546), symbol:model.layers.14.self_attn.v_proj.weight])[symbol:model.layers.14.self_attn.v_proj.weight] + tensor.CPU.register () -> (%3796:tensor<[128], Float32, CPU>[@model.layers.14.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=550), symbol:model.layers.14.self_attn.q_norm.weight])[symbol:model.layers.14.self_attn.q_norm.weight] + tensor.CPU.register () -> (%3974:tensor<[128], Float32, CPU>[@model.layers.14.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=552), symbol:model.layers.14.self_attn.k_norm.weight])[symbol:model.layers.14.self_attn.k_norm.weight] + tensor.CPU.register () -> (%3797:tensor<[2048, 2048], Float32, CPU>[@model.layers.14.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=565), symbol:model.layers.14.self_attn.o_proj.weight])[symbol:model.layers.14.self_attn.o_proj.weight] + tensor.CPU.register () -> (%4508:tensor<[2048], Float32, CPU>[@model.layers.14.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=568), symbol:model.layers.14.post_attention_layernorm.weight])[symbol:model.layers.14.post_attention_layernorm.weight] + tensor.CPU.register () -> (%7092:tensor<[6144, 2048], Float32, CPU>[@model.layers.14.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=569), symbol:model.layers.14.mlp.gate_proj.weight])[symbol:model.layers.14.mlp.gate_proj.weight] + tensor.CPU.register () -> (%7164:tensor<[6144, 2048], Float32, CPU>[@model.layers.14.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=572), symbol:model.layers.14.mlp.up_proj.weight])[symbol:model.layers.14.mlp.up_proj.weight] + tensor.CPU.register () -> (%4419:tensor<[2048, 6144], Float32, CPU>[@model.layers.14.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=574), symbol:model.layers.14.mlp.down_proj.weight])[symbol:model.layers.14.mlp.down_proj.weight] + tensor.CPU.register () -> (%5590:tensor<[2048], Float32, CPU>[@model.layers.15.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=577), symbol:model.layers.15.input_layernorm.weight])[symbol:model.layers.15.input_layernorm.weight] + tensor.CPU.register () -> (%5843:tensor<[2048, 2048], Float32, CPU>[@model.layers.15.self_attn.q_proj.weight][symbol:model.layers.15.self_attn.q_proj.weight])[symbol:model.layers.15.self_attn.q_proj.weight] + tensor.CPU.register () -> (%938:tensor<[1024, 2048], Float32, CPU>[@model.layers.15.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=578), symbol:model.layers.15.self_attn.k_proj.weight])[symbol:model.layers.15.self_attn.k_proj.weight] + tensor.CPU.register () -> (%3967:tensor<[1024, 2048], Float32, CPU>[@model.layers.15.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=580), symbol:model.layers.15.self_attn.v_proj.weight])[symbol:model.layers.15.self_attn.v_proj.weight] + tensor.CPU.register () -> (%3289:tensor<[128], Float32, CPU>[@model.layers.15.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=584), symbol:model.layers.15.self_attn.q_norm.weight])[symbol:model.layers.15.self_attn.q_norm.weight] + tensor.CPU.register () -> (%6756:tensor<[128], Float32, CPU>[@model.layers.15.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=586), symbol:model.layers.15.self_attn.k_norm.weight])[symbol:model.layers.15.self_attn.k_norm.weight] + tensor.CPU.register () -> (%4838:tensor<[2048, 2048], Float32, CPU>[@model.layers.15.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=599), symbol:model.layers.15.self_attn.o_proj.weight])[symbol:model.layers.15.self_attn.o_proj.weight] + tensor.CPU.register () -> (%6774:tensor<[2048], Float32, CPU>[@model.layers.15.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=602), symbol:model.layers.15.post_attention_layernorm.weight])[symbol:model.layers.15.post_attention_layernorm.weight] + tensor.CPU.register () -> (%2819:tensor<[6144, 2048], Float32, CPU>[@model.layers.15.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=603), symbol:model.layers.15.mlp.gate_proj.weight])[symbol:model.layers.15.mlp.gate_proj.weight] + tensor.CPU.register () -> (%1377:tensor<[6144, 2048], Float32, CPU>[@model.layers.15.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=606), symbol:model.layers.15.mlp.up_proj.weight])[symbol:model.layers.15.mlp.up_proj.weight] + tensor.CPU.register () -> (%526:tensor<[2048, 6144], Float32, CPU>[@model.layers.15.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=608), symbol:model.layers.15.mlp.down_proj.weight])[symbol:model.layers.15.mlp.down_proj.weight] + tensor.CPU.register () -> (%369:tensor<[2048], Float32, CPU>[@model.layers.16.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=611), symbol:model.layers.16.input_layernorm.weight])[symbol:model.layers.16.input_layernorm.weight] + tensor.CPU.register () -> (%2345:tensor<[2048, 2048], Float32, CPU>[@model.layers.16.self_attn.q_proj.weight][symbol:model.layers.16.self_attn.q_proj.weight])[symbol:model.layers.16.self_attn.q_proj.weight] + tensor.CPU.register () -> (%3022:tensor<[1024, 2048], Float32, CPU>[@model.layers.16.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=612), symbol:model.layers.16.self_attn.k_proj.weight])[symbol:model.layers.16.self_attn.k_proj.weight] + tensor.CPU.register () -> (%2931:tensor<[1024, 2048], Float32, CPU>[@model.layers.16.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=614), symbol:model.layers.16.self_attn.v_proj.weight])[symbol:model.layers.16.self_attn.v_proj.weight] + tensor.CPU.register () -> (%1150:tensor<[128], Float32, CPU>[@model.layers.16.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=618), symbol:model.layers.16.self_attn.q_norm.weight])[symbol:model.layers.16.self_attn.q_norm.weight] + tensor.CPU.register () -> (%5521:tensor<[128], Float32, CPU>[@model.layers.16.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=620), symbol:model.layers.16.self_attn.k_norm.weight])[symbol:model.layers.16.self_attn.k_norm.weight] + tensor.CPU.register () -> (%672:tensor<[2048, 2048], Float32, CPU>[@model.layers.16.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=633), symbol:model.layers.16.self_attn.o_proj.weight])[symbol:model.layers.16.self_attn.o_proj.weight] + tensor.CPU.register () -> (%6793:tensor<[2048], Float32, CPU>[@model.layers.16.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=636), symbol:model.layers.16.post_attention_layernorm.weight])[symbol:model.layers.16.post_attention_layernorm.weight] + tensor.CPU.register () -> (%993:tensor<[6144, 2048], Float32, CPU>[@model.layers.16.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=637), symbol:model.layers.16.mlp.gate_proj.weight])[symbol:model.layers.16.mlp.gate_proj.weight] + tensor.CPU.register () -> (%226:tensor<[6144, 2048], Float32, CPU>[@model.layers.16.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=640), symbol:model.layers.16.mlp.up_proj.weight])[symbol:model.layers.16.mlp.up_proj.weight] + tensor.CPU.register () -> (%7287:tensor<[2048, 6144], Float32, CPU>[@model.layers.16.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=642), symbol:model.layers.16.mlp.down_proj.weight])[symbol:model.layers.16.mlp.down_proj.weight] + tensor.CPU.register () -> (%7811:tensor<[2048], Float32, CPU>[@model.layers.17.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=645), symbol:model.layers.17.input_layernorm.weight])[symbol:model.layers.17.input_layernorm.weight] + tensor.CPU.register () -> (%5758:tensor<[2048, 2048], Float32, CPU>[@model.layers.17.self_attn.q_proj.weight][symbol:model.layers.17.self_attn.q_proj.weight])[symbol:model.layers.17.self_attn.q_proj.weight] + tensor.CPU.register () -> (%2828:tensor<[1024, 2048], Float32, CPU>[@model.layers.17.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=646), symbol:model.layers.17.self_attn.k_proj.weight])[symbol:model.layers.17.self_attn.k_proj.weight] + tensor.CPU.register () -> (%417:tensor<[1024, 2048], Float32, CPU>[@model.layers.17.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=648), symbol:model.layers.17.self_attn.v_proj.weight])[symbol:model.layers.17.self_attn.v_proj.weight] + tensor.CPU.register () -> (%59:tensor<[128], Float32, CPU>[@model.layers.17.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=652), symbol:model.layers.17.self_attn.q_norm.weight])[symbol:model.layers.17.self_attn.q_norm.weight] + tensor.CPU.register () -> (%7588:tensor<[128], Float32, CPU>[@model.layers.17.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=654), symbol:model.layers.17.self_attn.k_norm.weight])[symbol:model.layers.17.self_attn.k_norm.weight] + tensor.CPU.register () -> (%5285:tensor<[2048, 2048], Float32, CPU>[@model.layers.17.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=667), symbol:model.layers.17.self_attn.o_proj.weight])[symbol:model.layers.17.self_attn.o_proj.weight] + tensor.CPU.register () -> (%3787:tensor<[2048], Float32, CPU>[@model.layers.17.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=670), symbol:model.layers.17.post_attention_layernorm.weight])[symbol:model.layers.17.post_attention_layernorm.weight] + tensor.CPU.register () -> (%4841:tensor<[6144, 2048], Float32, CPU>[@model.layers.17.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=671), symbol:model.layers.17.mlp.gate_proj.weight])[symbol:model.layers.17.mlp.gate_proj.weight] + tensor.CPU.register () -> (%4784:tensor<[6144, 2048], Float32, CPU>[@model.layers.17.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=674), symbol:model.layers.17.mlp.up_proj.weight])[symbol:model.layers.17.mlp.up_proj.weight] + tensor.CPU.register () -> (%1908:tensor<[2048, 6144], Float32, CPU>[@model.layers.17.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=676), symbol:model.layers.17.mlp.down_proj.weight])[symbol:model.layers.17.mlp.down_proj.weight] + tensor.CPU.register () -> (%310:tensor<[2048], Float32, CPU>[@model.layers.18.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=679), symbol:model.layers.18.input_layernorm.weight])[symbol:model.layers.18.input_layernorm.weight] + tensor.CPU.register () -> (%7352:tensor<[2048, 2048], Float32, CPU>[@model.layers.18.self_attn.q_proj.weight][symbol:model.layers.18.self_attn.q_proj.weight])[symbol:model.layers.18.self_attn.q_proj.weight] + tensor.CPU.register () -> (%6436:tensor<[1024, 2048], Float32, CPU>[@model.layers.18.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=680), symbol:model.layers.18.self_attn.k_proj.weight])[symbol:model.layers.18.self_attn.k_proj.weight] + tensor.CPU.register () -> (%6164:tensor<[1024, 2048], Float32, CPU>[@model.layers.18.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=682), symbol:model.layers.18.self_attn.v_proj.weight])[symbol:model.layers.18.self_attn.v_proj.weight] + tensor.CPU.register () -> (%2747:tensor<[128], Float32, CPU>[@model.layers.18.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=686), symbol:model.layers.18.self_attn.q_norm.weight])[symbol:model.layers.18.self_attn.q_norm.weight] + tensor.CPU.register () -> (%5281:tensor<[128], Float32, CPU>[@model.layers.18.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=688), symbol:model.layers.18.self_attn.k_norm.weight])[symbol:model.layers.18.self_attn.k_norm.weight] + tensor.CPU.register () -> (%7646:tensor<[2048, 2048], Float32, CPU>[@model.layers.18.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=701), symbol:model.layers.18.self_attn.o_proj.weight])[symbol:model.layers.18.self_attn.o_proj.weight] + tensor.CPU.register () -> (%2540:tensor<[2048], Float32, CPU>[@model.layers.18.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=704), symbol:model.layers.18.post_attention_layernorm.weight])[symbol:model.layers.18.post_attention_layernorm.weight] + tensor.CPU.register () -> (%6101:tensor<[6144, 2048], Float32, CPU>[@model.layers.18.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=705), symbol:model.layers.18.mlp.gate_proj.weight])[symbol:model.layers.18.mlp.gate_proj.weight] + tensor.CPU.register () -> (%2195:tensor<[6144, 2048], Float32, CPU>[@model.layers.18.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=708), symbol:model.layers.18.mlp.up_proj.weight])[symbol:model.layers.18.mlp.up_proj.weight] + tensor.CPU.register () -> (%3651:tensor<[2048, 6144], Float32, CPU>[@model.layers.18.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=710), symbol:model.layers.18.mlp.down_proj.weight])[symbol:model.layers.18.mlp.down_proj.weight] + tensor.CPU.register () -> (%3722:tensor<[2048], Float32, CPU>[@model.layers.19.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=713), symbol:model.layers.19.input_layernorm.weight])[symbol:model.layers.19.input_layernorm.weight] + tensor.CPU.register () -> (%1141:tensor<[2048, 2048], Float32, CPU>[@model.layers.19.self_attn.q_proj.weight][symbol:model.layers.19.self_attn.q_proj.weight])[symbol:model.layers.19.self_attn.q_proj.weight] + tensor.CPU.register () -> (%651:tensor<[1024, 2048], Float32, CPU>[@model.layers.19.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=714), symbol:model.layers.19.self_attn.k_proj.weight])[symbol:model.layers.19.self_attn.k_proj.weight] + tensor.CPU.register () -> (%254:tensor<[1024, 2048], Float32, CPU>[@model.layers.19.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=716), symbol:model.layers.19.self_attn.v_proj.weight])[symbol:model.layers.19.self_attn.v_proj.weight] + tensor.CPU.register () -> (%610:tensor<[128], Float32, CPU>[@model.layers.19.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=720), symbol:model.layers.19.self_attn.q_norm.weight])[symbol:model.layers.19.self_attn.q_norm.weight] + tensor.CPU.register () -> (%3691:tensor<[128], Float32, CPU>[@model.layers.19.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=722), symbol:model.layers.19.self_attn.k_norm.weight])[symbol:model.layers.19.self_attn.k_norm.weight] + tensor.CPU.register () -> (%7002:tensor<[2048, 2048], Float32, CPU>[@model.layers.19.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=735), symbol:model.layers.19.self_attn.o_proj.weight])[symbol:model.layers.19.self_attn.o_proj.weight] + tensor.CPU.register () -> (%3446:tensor<[2048], Float32, CPU>[@model.layers.19.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=738), symbol:model.layers.19.post_attention_layernorm.weight])[symbol:model.layers.19.post_attention_layernorm.weight] + tensor.CPU.register () -> (%2118:tensor<[6144, 2048], Float32, CPU>[@model.layers.19.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=739), symbol:model.layers.19.mlp.gate_proj.weight])[symbol:model.layers.19.mlp.gate_proj.weight] + tensor.CPU.register () -> (%283:tensor<[6144, 2048], Float32, CPU>[@model.layers.19.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=742), symbol:model.layers.19.mlp.up_proj.weight])[symbol:model.layers.19.mlp.up_proj.weight] + tensor.CPU.register () -> (%1264:tensor<[2048, 6144], Float32, CPU>[@model.layers.19.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=744), symbol:model.layers.19.mlp.down_proj.weight])[symbol:model.layers.19.mlp.down_proj.weight] + tensor.CPU.register () -> (%5183:tensor<[2048], Float32, CPU>[@model.layers.20.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=747), symbol:model.layers.20.input_layernorm.weight])[symbol:model.layers.20.input_layernorm.weight] + tensor.CPU.register () -> (%6004:tensor<[2048, 2048], Float32, CPU>[@model.layers.20.self_attn.q_proj.weight][symbol:model.layers.20.self_attn.q_proj.weight])[symbol:model.layers.20.self_attn.q_proj.weight] + tensor.CPU.register () -> (%4764:tensor<[1024, 2048], Float32, CPU>[@model.layers.20.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=748), symbol:model.layers.20.self_attn.k_proj.weight])[symbol:model.layers.20.self_attn.k_proj.weight] + tensor.CPU.register () -> (%3516:tensor<[1024, 2048], Float32, CPU>[@model.layers.20.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=750), symbol:model.layers.20.self_attn.v_proj.weight])[symbol:model.layers.20.self_attn.v_proj.weight] + tensor.CPU.register () -> (%2042:tensor<[128], Float32, CPU>[@model.layers.20.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=754), symbol:model.layers.20.self_attn.q_norm.weight])[symbol:model.layers.20.self_attn.q_norm.weight] + tensor.CPU.register () -> (%1646:tensor<[128], Float32, CPU>[@model.layers.20.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=756), symbol:model.layers.20.self_attn.k_norm.weight])[symbol:model.layers.20.self_attn.k_norm.weight] + tensor.CPU.register () -> (%3587:tensor<[2048, 2048], Float32, CPU>[@model.layers.20.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=769), symbol:model.layers.20.self_attn.o_proj.weight])[symbol:model.layers.20.self_attn.o_proj.weight] + tensor.CPU.register () -> (%2726:tensor<[2048], Float32, CPU>[@model.layers.20.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=772), symbol:model.layers.20.post_attention_layernorm.weight])[symbol:model.layers.20.post_attention_layernorm.weight] + tensor.CPU.register () -> (%3656:tensor<[6144, 2048], Float32, CPU>[@model.layers.20.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=773), symbol:model.layers.20.mlp.gate_proj.weight])[symbol:model.layers.20.mlp.gate_proj.weight] + tensor.CPU.register () -> (%802:tensor<[6144, 2048], Float32, CPU>[@model.layers.20.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=776), symbol:model.layers.20.mlp.up_proj.weight])[symbol:model.layers.20.mlp.up_proj.weight] + tensor.CPU.register () -> (%62:tensor<[2048, 6144], Float32, CPU>[@model.layers.20.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=778), symbol:model.layers.20.mlp.down_proj.weight])[symbol:model.layers.20.mlp.down_proj.weight] + tensor.CPU.register () -> (%1237:tensor<[2048], Float32, CPU>[@model.layers.21.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=781), symbol:model.layers.21.input_layernorm.weight])[symbol:model.layers.21.input_layernorm.weight] + tensor.CPU.register () -> (%2397:tensor<[2048, 2048], Float32, CPU>[@model.layers.21.self_attn.q_proj.weight][symbol:model.layers.21.self_attn.q_proj.weight])[symbol:model.layers.21.self_attn.q_proj.weight] + tensor.CPU.register () -> (%7562:tensor<[1024, 2048], Float32, CPU>[@model.layers.21.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=782), symbol:model.layers.21.self_attn.k_proj.weight])[symbol:model.layers.21.self_attn.k_proj.weight] + tensor.CPU.register () -> (%4665:tensor<[1024, 2048], Float32, CPU>[@model.layers.21.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=784), symbol:model.layers.21.self_attn.v_proj.weight])[symbol:model.layers.21.self_attn.v_proj.weight] + tensor.CPU.register () -> (%6195:tensor<[128], Float32, CPU>[@model.layers.21.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=788), symbol:model.layers.21.self_attn.q_norm.weight])[symbol:model.layers.21.self_attn.q_norm.weight] + tensor.CPU.register () -> (%701:tensor<[128], Float32, CPU>[@model.layers.21.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=790), symbol:model.layers.21.self_attn.k_norm.weight])[symbol:model.layers.21.self_attn.k_norm.weight] + tensor.CPU.register () -> (%5913:tensor<[2048, 2048], Float32, CPU>[@model.layers.21.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=803), symbol:model.layers.21.self_attn.o_proj.weight])[symbol:model.layers.21.self_attn.o_proj.weight] + tensor.CPU.register () -> (%4765:tensor<[2048], Float32, CPU>[@model.layers.21.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=806), symbol:model.layers.21.post_attention_layernorm.weight])[symbol:model.layers.21.post_attention_layernorm.weight] + tensor.CPU.register () -> (%864:tensor<[6144, 2048], Float32, CPU>[@model.layers.21.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=807), symbol:model.layers.21.mlp.gate_proj.weight])[symbol:model.layers.21.mlp.gate_proj.weight] + tensor.CPU.register () -> (%923:tensor<[6144, 2048], Float32, CPU>[@model.layers.21.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=810), symbol:model.layers.21.mlp.up_proj.weight])[symbol:model.layers.21.mlp.up_proj.weight] + tensor.CPU.register () -> (%6934:tensor<[2048, 6144], Float32, CPU>[@model.layers.21.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=812), symbol:model.layers.21.mlp.down_proj.weight])[symbol:model.layers.21.mlp.down_proj.weight] + tensor.CPU.register () -> (%425:tensor<[2048], Float32, CPU>[@model.layers.22.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=815), symbol:model.layers.22.input_layernorm.weight])[symbol:model.layers.22.input_layernorm.weight] + tensor.CPU.register () -> (%1036:tensor<[2048, 2048], Float32, CPU>[@model.layers.22.self_attn.q_proj.weight][symbol:model.layers.22.self_attn.q_proj.weight])[symbol:model.layers.22.self_attn.q_proj.weight] + tensor.CPU.register () -> (%6990:tensor<[1024, 2048], Float32, CPU>[@model.layers.22.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=816), symbol:model.layers.22.self_attn.k_proj.weight])[symbol:model.layers.22.self_attn.k_proj.weight] + tensor.CPU.register () -> (%2703:tensor<[1024, 2048], Float32, CPU>[@model.layers.22.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=818), symbol:model.layers.22.self_attn.v_proj.weight])[symbol:model.layers.22.self_attn.v_proj.weight] + tensor.CPU.register () -> (%1995:tensor<[128], Float32, CPU>[@model.layers.22.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=822), symbol:model.layers.22.self_attn.q_norm.weight])[symbol:model.layers.22.self_attn.q_norm.weight] + tensor.CPU.register () -> (%2702:tensor<[128], Float32, CPU>[@model.layers.22.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=824), symbol:model.layers.22.self_attn.k_norm.weight])[symbol:model.layers.22.self_attn.k_norm.weight] + tensor.CPU.register () -> (%2221:tensor<[2048, 2048], Float32, CPU>[@model.layers.22.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=837), symbol:model.layers.22.self_attn.o_proj.weight])[symbol:model.layers.22.self_attn.o_proj.weight] + tensor.CPU.register () -> (%5286:tensor<[2048], Float32, CPU>[@model.layers.22.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=840), symbol:model.layers.22.post_attention_layernorm.weight])[symbol:model.layers.22.post_attention_layernorm.weight] + tensor.CPU.register () -> (%7377:tensor<[6144, 2048], Float32, CPU>[@model.layers.22.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=841), symbol:model.layers.22.mlp.gate_proj.weight])[symbol:model.layers.22.mlp.gate_proj.weight] + tensor.CPU.register () -> (%694:tensor<[6144, 2048], Float32, CPU>[@model.layers.22.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=844), symbol:model.layers.22.mlp.up_proj.weight])[symbol:model.layers.22.mlp.up_proj.weight] + tensor.CPU.register () -> (%1401:tensor<[2048, 6144], Float32, CPU>[@model.layers.22.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=846), symbol:model.layers.22.mlp.down_proj.weight])[symbol:model.layers.22.mlp.down_proj.weight] + tensor.CPU.register () -> (%809:tensor<[2048], Float32, CPU>[@model.layers.23.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=849), symbol:model.layers.23.input_layernorm.weight])[symbol:model.layers.23.input_layernorm.weight] + tensor.CPU.register () -> (%2936:tensor<[2048, 2048], Float32, CPU>[@model.layers.23.self_attn.q_proj.weight][symbol:model.layers.23.self_attn.q_proj.weight])[symbol:model.layers.23.self_attn.q_proj.weight] + tensor.CPU.register () -> (%577:tensor<[1024, 2048], Float32, CPU>[@model.layers.23.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=850), symbol:model.layers.23.self_attn.k_proj.weight])[symbol:model.layers.23.self_attn.k_proj.weight] + tensor.CPU.register () -> (%5308:tensor<[1024, 2048], Float32, CPU>[@model.layers.23.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=852), symbol:model.layers.23.self_attn.v_proj.weight])[symbol:model.layers.23.self_attn.v_proj.weight] + tensor.CPU.register () -> (%5454:tensor<[128], Float32, CPU>[@model.layers.23.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=856), symbol:model.layers.23.self_attn.q_norm.weight])[symbol:model.layers.23.self_attn.q_norm.weight] + tensor.CPU.register () -> (%1089:tensor<[128], Float32, CPU>[@model.layers.23.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=858), symbol:model.layers.23.self_attn.k_norm.weight])[symbol:model.layers.23.self_attn.k_norm.weight] + tensor.CPU.register () -> (%4076:tensor<[2048, 2048], Float32, CPU>[@model.layers.23.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=871), symbol:model.layers.23.self_attn.o_proj.weight])[symbol:model.layers.23.self_attn.o_proj.weight] + tensor.CPU.register () -> (%4535:tensor<[2048], Float32, CPU>[@model.layers.23.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874), symbol:model.layers.23.post_attention_layernorm.weight])[symbol:model.layers.23.post_attention_layernorm.weight] + tensor.CPU.register () -> (%7750:tensor<[6144, 2048], Float32, CPU>[@model.layers.23.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=875), symbol:model.layers.23.mlp.gate_proj.weight])[symbol:model.layers.23.mlp.gate_proj.weight] + tensor.CPU.register () -> (%4744:tensor<[6144, 2048], Float32, CPU>[@model.layers.23.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=878), symbol:model.layers.23.mlp.up_proj.weight])[symbol:model.layers.23.mlp.up_proj.weight] + tensor.CPU.register () -> (%2933:tensor<[2048, 6144], Float32, CPU>[@model.layers.23.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=880), symbol:model.layers.23.mlp.down_proj.weight])[symbol:model.layers.23.mlp.down_proj.weight] + tensor.CPU.register () -> (%1154:tensor<[2048], Float32, CPU>[@model.layers.24.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=883), symbol:model.layers.24.input_layernorm.weight])[symbol:model.layers.24.input_layernorm.weight] + tensor.CPU.register () -> (%2384:tensor<[2048, 2048], Float32, CPU>[@model.layers.24.self_attn.q_proj.weight][symbol:model.layers.24.self_attn.q_proj.weight])[symbol:model.layers.24.self_attn.q_proj.weight] + tensor.CPU.register () -> (%2620:tensor<[1024, 2048], Float32, CPU>[@model.layers.24.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=884), symbol:model.layers.24.self_attn.k_proj.weight])[symbol:model.layers.24.self_attn.k_proj.weight] + tensor.CPU.register () -> (%3265:tensor<[1024, 2048], Float32, CPU>[@model.layers.24.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=886), symbol:model.layers.24.self_attn.v_proj.weight])[symbol:model.layers.24.self_attn.v_proj.weight] + tensor.CPU.register () -> (%2985:tensor<[128], Float32, CPU>[@model.layers.24.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=890), symbol:model.layers.24.self_attn.q_norm.weight])[symbol:model.layers.24.self_attn.q_norm.weight] + tensor.CPU.register () -> (%3894:tensor<[128], Float32, CPU>[@model.layers.24.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=892), symbol:model.layers.24.self_attn.k_norm.weight])[symbol:model.layers.24.self_attn.k_norm.weight] + tensor.CPU.register () -> (%7488:tensor<[2048, 2048], Float32, CPU>[@model.layers.24.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=905), symbol:model.layers.24.self_attn.o_proj.weight])[symbol:model.layers.24.self_attn.o_proj.weight] + tensor.CPU.register () -> (%6713:tensor<[2048], Float32, CPU>[@model.layers.24.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=908), symbol:model.layers.24.post_attention_layernorm.weight])[symbol:model.layers.24.post_attention_layernorm.weight] + tensor.CPU.register () -> (%1336:tensor<[6144, 2048], Float32, CPU>[@model.layers.24.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=909), symbol:model.layers.24.mlp.gate_proj.weight])[symbol:model.layers.24.mlp.gate_proj.weight] + tensor.CPU.register () -> (%7035:tensor<[6144, 2048], Float32, CPU>[@model.layers.24.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=912), symbol:model.layers.24.mlp.up_proj.weight])[symbol:model.layers.24.mlp.up_proj.weight] + tensor.CPU.register () -> (%7069:tensor<[2048, 6144], Float32, CPU>[@model.layers.24.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=914), symbol:model.layers.24.mlp.down_proj.weight])[symbol:model.layers.24.mlp.down_proj.weight] + tensor.CPU.register () -> (%6496:tensor<[2048], Float32, CPU>[@model.layers.25.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=917), symbol:model.layers.25.input_layernorm.weight])[symbol:model.layers.25.input_layernorm.weight] + tensor.CPU.register () -> (%1852:tensor<[2048, 2048], Float32, CPU>[@model.layers.25.self_attn.q_proj.weight][symbol:model.layers.25.self_attn.q_proj.weight])[symbol:model.layers.25.self_attn.q_proj.weight] + tensor.CPU.register () -> (%3615:tensor<[1024, 2048], Float32, CPU>[@model.layers.25.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=918), symbol:model.layers.25.self_attn.k_proj.weight])[symbol:model.layers.25.self_attn.k_proj.weight] + tensor.CPU.register () -> (%2014:tensor<[1024, 2048], Float32, CPU>[@model.layers.25.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=920), symbol:model.layers.25.self_attn.v_proj.weight])[symbol:model.layers.25.self_attn.v_proj.weight] + tensor.CPU.register () -> (%2021:tensor<[128], Float32, CPU>[@model.layers.25.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=924), symbol:model.layers.25.self_attn.q_norm.weight])[symbol:model.layers.25.self_attn.q_norm.weight] + tensor.CPU.register () -> (%1413:tensor<[128], Float32, CPU>[@model.layers.25.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=926), symbol:model.layers.25.self_attn.k_norm.weight])[symbol:model.layers.25.self_attn.k_norm.weight] + tensor.CPU.register () -> (%7074:tensor<[2048, 2048], Float32, CPU>[@model.layers.25.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=939), symbol:model.layers.25.self_attn.o_proj.weight])[symbol:model.layers.25.self_attn.o_proj.weight] + tensor.CPU.register () -> (%6424:tensor<[2048], Float32, CPU>[@model.layers.25.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=942), symbol:model.layers.25.post_attention_layernorm.weight])[symbol:model.layers.25.post_attention_layernorm.weight] + tensor.CPU.register () -> (%1860:tensor<[6144, 2048], Float32, CPU>[@model.layers.25.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=943), symbol:model.layers.25.mlp.gate_proj.weight])[symbol:model.layers.25.mlp.gate_proj.weight] + tensor.CPU.register () -> (%5840:tensor<[6144, 2048], Float32, CPU>[@model.layers.25.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=946), symbol:model.layers.25.mlp.up_proj.weight])[symbol:model.layers.25.mlp.up_proj.weight] + tensor.CPU.register () -> (%6869:tensor<[2048, 6144], Float32, CPU>[@model.layers.25.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=948), symbol:model.layers.25.mlp.down_proj.weight])[symbol:model.layers.25.mlp.down_proj.weight] + tensor.CPU.register () -> (%611:tensor<[2048], Float32, CPU>[@model.layers.26.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=951), symbol:model.layers.26.input_layernorm.weight])[symbol:model.layers.26.input_layernorm.weight] + tensor.CPU.register () -> (%1040:tensor<[2048, 2048], Float32, CPU>[@model.layers.26.self_attn.q_proj.weight][symbol:model.layers.26.self_attn.q_proj.weight])[symbol:model.layers.26.self_attn.q_proj.weight] + tensor.CPU.register () -> (%2312:tensor<[1024, 2048], Float32, CPU>[@model.layers.26.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=952), symbol:model.layers.26.self_attn.k_proj.weight])[symbol:model.layers.26.self_attn.k_proj.weight] + tensor.CPU.register () -> (%174:tensor<[1024, 2048], Float32, CPU>[@model.layers.26.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=954), symbol:model.layers.26.self_attn.v_proj.weight])[symbol:model.layers.26.self_attn.v_proj.weight] + tensor.CPU.register () -> (%2799:tensor<[128], Float32, CPU>[@model.layers.26.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=958), symbol:model.layers.26.self_attn.q_norm.weight])[symbol:model.layers.26.self_attn.q_norm.weight] + tensor.CPU.register () -> (%6479:tensor<[128], Float32, CPU>[@model.layers.26.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=960), symbol:model.layers.26.self_attn.k_norm.weight])[symbol:model.layers.26.self_attn.k_norm.weight] + tensor.CPU.register () -> (%504:tensor<[2048, 2048], Float32, CPU>[@model.layers.26.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=973), symbol:model.layers.26.self_attn.o_proj.weight])[symbol:model.layers.26.self_attn.o_proj.weight] + tensor.CPU.register () -> (%5096:tensor<[2048], Float32, CPU>[@model.layers.26.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=976), symbol:model.layers.26.post_attention_layernorm.weight])[symbol:model.layers.26.post_attention_layernorm.weight] + tensor.CPU.register () -> (%4867:tensor<[6144, 2048], Float32, CPU>[@model.layers.26.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=977), symbol:model.layers.26.mlp.gate_proj.weight])[symbol:model.layers.26.mlp.gate_proj.weight] + tensor.CPU.register () -> (%2619:tensor<[6144, 2048], Float32, CPU>[@model.layers.26.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=980), symbol:model.layers.26.mlp.up_proj.weight])[symbol:model.layers.26.mlp.up_proj.weight] + tensor.CPU.register () -> (%1355:tensor<[2048, 6144], Float32, CPU>[@model.layers.26.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=982), symbol:model.layers.26.mlp.down_proj.weight])[symbol:model.layers.26.mlp.down_proj.weight] + tensor.CPU.register () -> (%6381:tensor<[2048], Float32, CPU>[@model.layers.27.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=985), symbol:model.layers.27.input_layernorm.weight])[symbol:model.layers.27.input_layernorm.weight] + tensor.CPU.register () -> (%5946:tensor<[2048, 2048], Float32, CPU>[@model.layers.27.self_attn.q_proj.weight][symbol:model.layers.27.self_attn.q_proj.weight])[symbol:model.layers.27.self_attn.q_proj.weight] + tensor.CPU.register () -> (%1802:tensor<[1024, 2048], Float32, CPU>[@model.layers.27.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=986), symbol:model.layers.27.self_attn.k_proj.weight])[symbol:model.layers.27.self_attn.k_proj.weight] + tensor.CPU.register () -> (%6652:tensor<[1024, 2048], Float32, CPU>[@model.layers.27.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=988), symbol:model.layers.27.self_attn.v_proj.weight])[symbol:model.layers.27.self_attn.v_proj.weight] + tensor.CPU.register () -> (%6206:tensor<[128], Float32, CPU>[@model.layers.27.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=992), symbol:model.layers.27.self_attn.q_norm.weight])[symbol:model.layers.27.self_attn.q_norm.weight] + tensor.CPU.register () -> (%1743:tensor<[128], Float32, CPU>[@model.layers.27.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=994), symbol:model.layers.27.self_attn.k_norm.weight])[symbol:model.layers.27.self_attn.k_norm.weight] + tensor.CPU.register () -> (%5189:tensor<[2048, 2048], Float32, CPU>[@model.layers.27.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1007), symbol:model.layers.27.self_attn.o_proj.weight])[symbol:model.layers.27.self_attn.o_proj.weight] + tensor.CPU.register () -> (%3001:tensor<[2048], Float32, CPU>[@model.layers.27.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1010), symbol:model.layers.27.post_attention_layernorm.weight])[symbol:model.layers.27.post_attention_layernorm.weight] + tensor.CPU.register () -> (%5561:tensor<[6144, 2048], Float32, CPU>[@model.layers.27.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1011), symbol:model.layers.27.mlp.gate_proj.weight])[symbol:model.layers.27.mlp.gate_proj.weight] + tensor.CPU.register () -> (%2731:tensor<[6144, 2048], Float32, CPU>[@model.layers.27.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1014), symbol:model.layers.27.mlp.up_proj.weight])[symbol:model.layers.27.mlp.up_proj.weight] + tensor.CPU.register () -> (%3783:tensor<[2048, 6144], Float32, CPU>[@model.layers.27.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1016), symbol:model.layers.27.mlp.down_proj.weight])[symbol:model.layers.27.mlp.down_proj.weight] + tensor.CPU.register () -> (%5765:tensor<[2048], Float32, CPU>[@model.norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1019), symbol:model.norm.weight])[symbol:model.norm.weight] + tensor.CPU.register () -> (%6130:tensor<[151936, 2048], Float32, CPU>[@lm_head.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1020), symbol:lm_head.weight])[symbol:lm_head.weight] } } graph.SubGraphOp @deinit [symbol:deinit] { @@ -319,1697 +321,1697 @@ } } - graph.CallGraphOp @model (%318:tensor<[1, 32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=0)], %376:tensor<[1, 32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=1)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %320:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %322:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %324:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %326:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %328:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %330:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %332:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %334:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %336:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %338:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %340:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %342:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %344:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %346:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %348:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %350:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %352:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %354:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %356:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %358:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %360:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %362:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %364:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %366:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %368:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %370:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %372:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %374:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %321:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)], %323:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)], %325:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)], %327:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)], %329:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)], %331:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)], %333:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)], %335:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)], %337:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)], %339:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)], %341:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)], %343:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)], %345:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)], %347:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)], %349:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)], %351:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)], %353:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)], %355:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)], %357:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)], %359:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)], %361:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)], %363:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)], %365:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)], %367:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)], %369:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)], %371:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)], %373:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)], %375:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%1530:tensor<[1, 32, 151936], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1020)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=77)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=111)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=145)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=179)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=213)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=247)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=281)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=315)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=349)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=383)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=417)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=451)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=485)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=519)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=553)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=587)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=621)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=655)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=689)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=723)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=757)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=791)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=825)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=859)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=893)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=927)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=961)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=995)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=79)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=113)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=147)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=181)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=215)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=249)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=283)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=317)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=351)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=385)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=419)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=453)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=487)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=521)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=555)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=589)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=623)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=657)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=691)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=725)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=759)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=793)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=827)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=861)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=895)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=929)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=963)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=997)]) + graph.CallGraphOp @model (%8013:tensor<[1, 32], Int32, CPU>[quant_recipe:QuantSpec(Raw(type: Int32), uuid=0)], %8071:tensor<[1, 32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=1)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8015:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8017:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8019:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8021:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8023:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8025:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8027:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8029:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8031:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8033:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8035:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8037:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8039:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8041:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8043:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8045:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8047:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8049:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8051:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8053:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8055:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8057:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8059:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8061:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8063:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8065:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8067:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8069:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8016:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)], %8018:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)], %8020:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)], %8022:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)], %8024:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)], %8026:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)], %8028:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)], %8030:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)], %8032:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)], %8034:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)], %8036:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)], %8038:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)], %8040:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)], %8042:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)], %8044:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)], %8046:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)], %8048:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)], %8050:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)], %8052:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)], %8054:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)], %8056:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)], %8058:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)], %8060:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)], %8062:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)], %8064:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)], %8066:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)], %8068:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)], %8070:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%9225:tensor<[1, 32, 151936], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1021)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) graph.SubGraphOp @model [using_qnn:true, symbol:model] { - (%318:tensor<[1, 32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=0)], %376:tensor<[1, 32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=1)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %320:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %322:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %324:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %326:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %328:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %330:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %332:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %334:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %336:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %338:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %340:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %342:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %344:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %346:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %348:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %350:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %352:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %354:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %356:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %358:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %360:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %362:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %364:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %366:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %368:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %370:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %372:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %374:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %321:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)], %323:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)], %325:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)], %327:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)], %329:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)], %331:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)], %333:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)], %335:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)], %337:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)], %339:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)], %341:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)], %343:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)], %345:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)], %347:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)], %349:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)], %351:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)], %353:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)], %355:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)], %357:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)], %359:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)], %361:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)], %363:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)], %365:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)], %367:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)], %369:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)], %371:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)], %373:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)], %375:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%1530:tensor<[1, 32, 151936], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1020)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=77)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=111)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=145)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=179)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=213)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=247)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=281)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=315)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=349)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=383)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=417)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=451)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=485)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=519)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=553)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=587)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=621)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=655)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=689)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=723)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=757)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=791)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=825)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=859)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=893)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=927)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=961)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=995)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=79)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=113)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=147)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=181)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=215)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=249)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=283)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=317)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=351)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=385)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=419)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=453)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=487)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=521)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=555)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=589)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=623)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=657)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=691)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=725)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=759)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=793)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=827)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=861)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=895)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=929)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=963)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=997)]) { - linalg.CPU.EmbeddingOp (%318:tensor<[1, 32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=0)]) -> (%377:tensor<[1, 32, 2048], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=59)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=59), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60), )] (%377:tensor<[1, 32, 2048], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=59)]) -> (%378:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int64), uuid=1), outputs_0:QuantSpec(Raw(type: Int64), uuid=1), )] (%376:tensor<[1, 32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=1)]) -> (%376:tensor<[32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=1)]) - linalg.CPU.IndexOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=61), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), )] (%316:tensor<[1, 1024, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=61)]) -> (%379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)]) - linalg.CPU.IndexOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), )] (%317:tensor<[1, 1024, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)]) -> (%380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) - graph.CallGraphOp @model.layers.0 (%378:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %320:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %321:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%421:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=98)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=77)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=79)]) - graph.CallGraphOp @model.layers.1 (%421:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=98)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %322:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %323:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%462:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=111)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=113)]) - graph.CallGraphOp @model.layers.2 (%462:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %324:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %325:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%503:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=166)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=145)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=147)]) - graph.CallGraphOp @model.layers.3 (%503:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=166)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %326:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %327:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%544:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=200)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=179)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=181)]) - graph.CallGraphOp @model.layers.4 (%544:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=200)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %328:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %329:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%585:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=213)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=215)]) - graph.CallGraphOp @model.layers.5 (%585:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %330:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %331:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%626:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=268)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=247)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=249)]) - graph.CallGraphOp @model.layers.6 (%626:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=268)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %332:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %333:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%667:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=281)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=283)]) - graph.CallGraphOp @model.layers.7 (%667:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %334:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %335:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%708:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=336)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=315)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=317)]) - graph.CallGraphOp @model.layers.8 (%708:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=336)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %336:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %337:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%749:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=370)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=349)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=351)]) - graph.CallGraphOp @model.layers.9 (%749:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=370)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %338:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %339:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%790:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=404)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=383)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=385)]) - graph.CallGraphOp @model.layers.10 (%790:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=404)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %340:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %341:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%831:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=438)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=417)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=419)]) - graph.CallGraphOp @model.layers.11 (%831:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=438)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %342:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %343:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%872:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=472)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=451)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=453)]) - graph.CallGraphOp @model.layers.12 (%872:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=472)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %344:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %345:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%913:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=485)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=487)]) - graph.CallGraphOp @model.layers.13 (%913:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %346:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %347:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%954:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=519)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=521)]) - graph.CallGraphOp @model.layers.14 (%954:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %348:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %349:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%995:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=553)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=555)]) - graph.CallGraphOp @model.layers.15 (%995:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %350:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %351:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%1036:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=608)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=587)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=589)]) - graph.CallGraphOp @model.layers.16 (%1036:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=608)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %352:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %353:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%1077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=621)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=623)]) - graph.CallGraphOp @model.layers.17 (%1077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %354:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %355:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%1118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=676)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=655)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=657)]) - graph.CallGraphOp @model.layers.18 (%1118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=676)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %356:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %357:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%1159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=710)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=689)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=691)]) - graph.CallGraphOp @model.layers.19 (%1159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=710)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %358:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %359:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%1200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=723)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=725)]) - graph.CallGraphOp @model.layers.20 (%1200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %360:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %361:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%1241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=778)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=757)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=759)]) - graph.CallGraphOp @model.layers.21 (%1241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=778)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %362:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %363:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%1282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=791)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=793)]) - graph.CallGraphOp @model.layers.22 (%1282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %364:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %365:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%1323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=846)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=825)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=827)]) - graph.CallGraphOp @model.layers.23 (%1323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=846)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %366:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %367:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%1364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=880)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=859)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=861)]) - graph.CallGraphOp @model.layers.24 (%1364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=880)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %368:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %369:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%1405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=914)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=893)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=895)]) - graph.CallGraphOp @model.layers.25 (%1405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=914)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %370:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %371:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%1446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=948)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=927)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=929)]) - graph.CallGraphOp @model.layers.26 (%1446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=948)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %372:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %373:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%1487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=982)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=961)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=963)]) - graph.CallGraphOp @model.layers.27 (%1487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=982)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %374:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %375:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%1528:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1016)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=995)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=997)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1016), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017), )] (%1528:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1016)]) -> (%1529:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1020), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1019)), using_qnn:true] (%1529:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)]) -> (%1530:tensor<[1, 32, 151936], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1020)]) - cf.ReturnOp (%1530:tensor<[1, 32, 151936], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1020)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=77)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=111)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=145)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=179)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=213)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=247)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=281)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=315)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=349)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=383)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=417)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=451)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=485)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=519)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=553)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=587)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=621)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=655)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=689)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=723)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=757)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=791)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=825)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=859)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=893)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=927)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=961)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=995)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=79)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=113)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=147)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=181)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=215)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=249)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=283)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=317)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=351)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=385)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=419)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=453)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=487)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=521)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=555)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=589)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=623)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=657)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=691)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=725)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=759)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=793)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=827)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=861)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=895)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=929)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=963)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=997)]) -> () + (%8013:tensor<[1, 32], Int32, CPU>[quant_recipe:QuantSpec(Raw(type: Int32), uuid=0)], %8071:tensor<[1, 32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=1)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8015:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8017:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8019:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8021:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8023:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8025:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8027:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8029:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8031:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8033:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8035:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8037:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8039:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8041:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8043:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8045:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8047:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8049:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8051:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8053:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8055:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8057:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8059:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8061:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8063:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8065:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8067:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8069:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8016:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)], %8018:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)], %8020:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)], %8022:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)], %8024:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)], %8026:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)], %8028:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)], %8030:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)], %8032:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)], %8034:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)], %8036:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)], %8038:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)], %8040:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)], %8042:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)], %8044:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)], %8046:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)], %8048:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)], %8050:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)], %8052:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)], %8054:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)], %8056:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)], %8058:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)], %8060:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)], %8062:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)], %8064:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)], %8066:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)], %8068:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)], %8070:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%9225:tensor<[1, 32, 151936], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1021)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) { + linalg.CPU.EmbeddingOp (%8013:tensor<[1, 32], Int32, CPU>[quant_recipe:QuantSpec(Raw(type: Int32), uuid=0)]) -> (%8072:tensor<[1, 32, 2048], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=59)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=59), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60), weight_weight:QuantSpec(Raw(type: Float32), uuid=61))] (%8072:tensor<[1, 32, 2048], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=59)]) -> (%8073:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int64), uuid=1), outputs_0:QuantSpec(Raw(type: Int64), uuid=1), )] (%8071:tensor<[1, 32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=1)]) -> (%8071:tensor<[32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=1)]) + linalg.CPU.IndexOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), )] (%8011:tensor<[1, 1024, 128], Int16PerTensor, CPU>[@rope_sin][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), symbol:rope_sin]) -> (%8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)]) + linalg.CPU.IndexOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), )] (%8012:tensor<[1, 1024, 128], Int16PerTensor, CPU>[@rope_cos][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), symbol:rope_cos]) -> (%8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) + graph.CallGraphOp @model.layers.0 (%8073:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8015:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8016:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%8116:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)]) + graph.CallGraphOp @model.layers.1 (%8116:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8017:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8018:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%8157:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)]) + graph.CallGraphOp @model.layers.2 (%8157:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8019:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8020:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%8198:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)]) + graph.CallGraphOp @model.layers.3 (%8198:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8021:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8022:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%8239:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)]) + graph.CallGraphOp @model.layers.4 (%8239:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8023:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8024:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%8280:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)]) + graph.CallGraphOp @model.layers.5 (%8280:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8025:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8026:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%8321:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)]) + graph.CallGraphOp @model.layers.6 (%8321:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8027:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8028:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%8362:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) + graph.CallGraphOp @model.layers.7 (%8362:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8029:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8030:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%8403:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)]) + graph.CallGraphOp @model.layers.8 (%8403:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8031:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8032:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%8444:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)]) + graph.CallGraphOp @model.layers.9 (%8444:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8033:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8034:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%8485:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)]) + graph.CallGraphOp @model.layers.10 (%8485:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8035:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8036:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%8526:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)]) + graph.CallGraphOp @model.layers.11 (%8526:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8037:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8038:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%8567:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)]) + graph.CallGraphOp @model.layers.12 (%8567:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8039:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8040:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%8608:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)]) + graph.CallGraphOp @model.layers.13 (%8608:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8041:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8042:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%8649:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)]) + graph.CallGraphOp @model.layers.14 (%8649:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8043:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8044:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%8690:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) + graph.CallGraphOp @model.layers.15 (%8690:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8045:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8046:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%8731:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)]) + graph.CallGraphOp @model.layers.16 (%8731:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8047:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8048:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%8772:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)]) + graph.CallGraphOp @model.layers.17 (%8772:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8049:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8050:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%8813:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)]) + graph.CallGraphOp @model.layers.18 (%8813:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8051:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8052:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%8854:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)]) + graph.CallGraphOp @model.layers.19 (%8854:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8053:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8054:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%8895:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)]) + graph.CallGraphOp @model.layers.20 (%8895:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8055:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8056:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%8936:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)]) + graph.CallGraphOp @model.layers.21 (%8936:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8057:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8058:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%8977:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) + graph.CallGraphOp @model.layers.22 (%8977:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8059:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8060:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%9018:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)]) + graph.CallGraphOp @model.layers.23 (%9018:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8061:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8062:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%9059:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)]) + graph.CallGraphOp @model.layers.24 (%9059:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8063:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8064:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%9100:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)]) + graph.CallGraphOp @model.layers.25 (%9100:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8065:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8066:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%9141:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)]) + graph.CallGraphOp @model.layers.26 (%9141:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8067:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8068:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%9182:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)]) + graph.CallGraphOp @model.layers.27 (%9182:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8069:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8070:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%9223:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1018), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1019))] (%9223:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)]) -> (%9224:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1018)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1018), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1021), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1020)), using_qnn:true] (%9224:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1018)]) -> (%9225:tensor<[1, 32, 151936], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1021)]) + cf.ReturnOp (%9225:tensor<[1, 32, 151936], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1021)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) -> () } } graph.SubGraphOp @model.layers.0 [using_qnn:true, symbol:model.layers.0] { - (%378:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %320:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %321:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%421:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=98)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=77)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=79)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), )] (%378:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)]) -> (%381:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) - graph.CallGraphOp @model.layers.0.self_attn (%381:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %320:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %321:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%413:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=89)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=77)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=79)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=89), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=89), )] (%413:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=89)], %378:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)]) -> (%414:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=89)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=89), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90), )] (%414:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=89)]) -> (%415:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)]) - graph.CallGraphOp @model.layers.0.mlp (%415:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)]) -> (%420:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=98)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=98), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=89), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=98), )] (%420:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=98)], %414:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=89)]) -> (%421:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=98)]) - cf.ReturnOp (%421:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=98)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=77)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=79)]) -> () + (%8073:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8015:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8016:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%8116:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=67))] (%8073:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)]) -> (%8076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66)]) + graph.CallGraphOp @model.layers.0.self_attn (%8076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8015:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8016:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%8108:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90), )] (%8108:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)], %8073:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)]) -> (%8109:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=91), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=92))] (%8109:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)]) -> (%8110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=91)]) + graph.CallGraphOp @model.layers.0.mlp (%8110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=91)]) -> (%8115:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), )] (%8115:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)], %8109:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)]) -> (%8116:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) + cf.ReturnOp (%8116:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)]) -> () } } graph.SubGraphOp @model.layers.0.self_attn [using_qnn:true, symbol:model.layers.0.self_attn] { - (%381:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %320:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %321:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%413:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=89)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=77)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=79)]) { - linalg.CPU.LinearOp (%381:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%382:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=71)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=68), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=67))] (%381:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%383:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=68)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=70), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=69))] (%381:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%384:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=70)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=71), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=71), )] (%382:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=71)]) -> (%382:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=71)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=71), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=71), )] (%382:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=71)]) -> (%385:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=71)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=68), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=68), )] (%383:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=68)]) -> (%383:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=68)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=68), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=68), )] (%383:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=68)]) -> (%386:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=68)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=70), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=70), )] (%384:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=70)]) -> (%384:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=70)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=70), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=70), )] (%384:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=70)]) -> (%387:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=70)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=71), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=72), )] (%385:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=71)]) -> (%388:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=72)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=68), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=74), )] (%386:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=68)]) -> (%389:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=74)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=72), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=72), )] (%388:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=72)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%390:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=72)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=74), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=74), )] (%389:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=74)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%391:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=74)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=74), outputs_0:QuantSpec(Raw(type: Float16), uuid=76), )] (%391:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=74)]) -> (%392:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=76)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=76), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=77), )] (%392:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=76)]) -> (%393:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=77)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=77), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=77), )] (%393:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=77)]) -> (%394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=77)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=70), outputs_0:QuantSpec(Raw(type: Float16), uuid=78), )] (%387:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=70)]) -> (%395:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=78)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=78), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=79), )] (%395:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=78)]) -> (%396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=79)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=77), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), )] (%320:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=77)]) -> (%397:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=79), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), )] (%321:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=79)]) -> (%398:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), )] (%397:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) -> (%399:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), )] (%398:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%400:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=72), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=80), )] (%390:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=72)], %399:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) -> (%401:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=80)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=80), inputs_1:QuantSpec(Raw(type: Float32), uuid=81), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=80), )] (%401:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=80)], %402:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=81), constant:[0.088388346]]) -> (%403:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=80)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=80), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=82), )] (%403:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=80)]) -> (%404:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=82)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=82), inputs_1:QuantSpec(Raw(type: Int16), uuid=83), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=82), )] (%404:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=82)], %405:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=83), constant:[-20]]) -> (%406:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=82)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=84), outputs_0:QuantSpec(Raw(type: UInt8), uuid=85), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %407:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=84), constant:[0]]) -> (%408:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=85)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=85), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=80), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=82), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=82), )] (%408:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=85)], %403:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=80)], %406:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=82)]) -> (%409:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=82)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=82), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=86), )] (%409:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=82)]) -> (%410:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=86)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=86), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87), )] (%410:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=86)], %400:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%411:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87), )] (%411:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87)]) -> (%412:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87), )] (%412:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87)]) -> (%412:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=89), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=88))] (%412:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87)]) -> (%413:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=89)]) - cf.ReturnOp (%413:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=89)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=77)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=79)]) -> () + (%8076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8015:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8016:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%8108:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)]) { + linalg.CPU.LinearOp (%8076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66)]) -> (%8077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=72)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=68))] (%8076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66)]) -> (%8078:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=70))] (%8076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66)]) -> (%8079:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=72), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=72), )] (%8077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=72)]) -> (%8077:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=72)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=72), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=72), )] (%8077:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=72)]) -> (%8080:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=72)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69), )] (%8078:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69)]) -> (%8078:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69), )] (%8078:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69)]) -> (%8081:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71), )] (%8079:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71)]) -> (%8079:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71), )] (%8079:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71)]) -> (%8082:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=72), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=73), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=74))] (%8080:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=72)]) -> (%8083:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=73)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=75), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=76))] (%8081:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69)]) -> (%8084:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=75)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=73), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=73), )] (%8083:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=73)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8085:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=73)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=75), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=75), )] (%8084:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=75)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8086:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=75)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=75), outputs_0:QuantSpec(Raw(type: Float16), uuid=77), )] (%8086:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=75)]) -> (%8087:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=77)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=77), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78), )] (%8087:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=77)]) -> (%8088:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78), )] (%8088:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)]) -> (%8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71), outputs_0:QuantSpec(Raw(type: Float16), uuid=79), )] (%8082:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71)]) -> (%8090:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=79)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=79), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80), )] (%8090:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=79)]) -> (%8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), )] (%8015:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)]) -> (%8092:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), )] (%8016:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)]) -> (%8093:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), )] (%8092:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) -> (%8094:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), )] (%8093:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%8095:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=73), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81), )] (%8085:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=73)], %8094:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) -> (%8096:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81), inputs_1:QuantSpec(Raw(type: Float32), uuid=82), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81), )] (%8096:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81)], %8097:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=82), constant:[0.088388346]]) -> (%8098:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83), )] (%8098:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81)]) -> (%8099:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83), inputs_1:QuantSpec(Raw(type: Int16), uuid=84), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83), )] (%8099:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83)], %8100:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=84), constant:[-20]]) -> (%8101:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=85), outputs_0:QuantSpec(Raw(type: UInt8), uuid=86), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8102:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=85), constant:[0]]) -> (%8103:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=86)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=86), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83), )] (%8103:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=86)], %8098:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81)], %8101:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83)]) -> (%8104:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87), )] (%8104:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83)]) -> (%8105:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88), )] (%8105:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87)], %8095:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%8106:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88), )] (%8106:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88)]) -> (%8107:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88), )] (%8107:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88)]) -> (%8107:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=89))] (%8107:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88)]) -> (%8108:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)]) + cf.ReturnOp (%8108:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)]) -> () } } graph.SubGraphOp @model.layers.0.mlp [using_qnn:true, symbol:model.layers.0.mlp] { - (%415:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)]) -> (%420:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=98)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=93), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=92))] (%415:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)]) -> (%416:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=93)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=93), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94), )] (%416:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=93)]) -> (%417:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=96), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=95))] (%415:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)]) -> (%418:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=96)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=96), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94), )] (%417:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)], %418:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=96)]) -> (%419:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=98), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=97))] (%419:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)]) -> (%420:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=98)]) - cf.ReturnOp (%420:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=98)]) -> () + (%8110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=91)]) -> (%8115:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=91), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=93))] (%8110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=91)]) -> (%8111:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95), )] (%8111:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)]) -> (%8112:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=91), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=96))] (%8110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=91)]) -> (%8113:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95), )] (%8112:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95)], %8113:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97)]) -> (%8114:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=98))] (%8114:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95)]) -> (%8115:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) + cf.ReturnOp (%8115:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) -> () } } graph.SubGraphOp @model.layers.1 [using_qnn:true, symbol:model.layers.1] { - (%421:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=98)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %322:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %323:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%462:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=111)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=113)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=98), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), )] (%421:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=98)]) -> (%422:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) - graph.CallGraphOp @model.layers.1.self_attn (%422:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %322:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %323:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%454:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=123)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=111)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=113)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=123), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=98), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=123), )] (%454:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=123)], %421:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=98)]) -> (%455:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=123)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=123), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124), )] (%455:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=123)]) -> (%456:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) - graph.CallGraphOp @model.layers.1.mlp (%456:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) -> (%461:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=123), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132), )] (%461:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132)], %455:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=123)]) -> (%462:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132)]) - cf.ReturnOp (%462:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=111)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=113)]) -> () + (%8116:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8017:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8018:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%8157:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=101))] (%8116:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) -> (%8117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100)]) + graph.CallGraphOp @model.layers.1.self_attn (%8117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8017:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8018:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%8149:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124), )] (%8149:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)], %8116:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) -> (%8150:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=126))] (%8150:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) -> (%8151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125)]) + graph.CallGraphOp @model.layers.1.mlp (%8151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125)]) -> (%8156:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133), )] (%8156:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)], %8150:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) -> (%8157:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)]) + cf.ReturnOp (%8157:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)]) -> () } } graph.SubGraphOp @model.layers.1.self_attn [using_qnn:true, symbol:model.layers.1.self_attn] { - (%422:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %322:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %323:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%454:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=123)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=111)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=113)]) { - linalg.CPU.LinearOp (%422:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) -> (%423:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=105)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=101))] (%422:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) -> (%424:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=104), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=103))] (%422:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) -> (%425:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=104)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=105), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=105), )] (%423:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=105)]) -> (%423:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=105)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=105), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=105), )] (%423:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=105)]) -> (%426:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=105)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102), )] (%424:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102)]) -> (%424:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102), )] (%424:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102)]) -> (%427:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=104), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=104), )] (%425:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=104)]) -> (%425:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=104)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=104), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=104), )] (%425:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=104)]) -> (%428:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=104)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=105), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=106), )] (%426:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=105)]) -> (%429:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=106)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=108), )] (%427:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102)]) -> (%430:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=108)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=106), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=106), )] (%429:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=106)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%431:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=106)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=108), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=108), )] (%430:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=108)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%432:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=108)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=108), outputs_0:QuantSpec(Raw(type: Float16), uuid=110), )] (%432:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=108)]) -> (%433:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=110)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=110), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=111), )] (%433:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=110)]) -> (%434:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=111)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=111), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=111), )] (%434:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=111)]) -> (%435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=111)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=104), outputs_0:QuantSpec(Raw(type: Float16), uuid=112), )] (%428:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=104)]) -> (%436:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=112)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=112), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=113), )] (%436:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=112)]) -> (%437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=113)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=111), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), )] (%322:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=111)]) -> (%438:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=113), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), )] (%323:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=113)]) -> (%439:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), )] (%438:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) -> (%440:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), )] (%439:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%441:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=106), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114), )] (%431:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=106)], %440:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) -> (%442:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114), inputs_1:QuantSpec(Raw(type: Float32), uuid=115), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114), )] (%442:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114)], %443:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=115), constant:[0.088388346]]) -> (%444:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116), )] (%444:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114)]) -> (%445:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116), inputs_1:QuantSpec(Raw(type: Int16), uuid=117), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116), )] (%445:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116)], %446:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=117), constant:[-20]]) -> (%447:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=118), outputs_0:QuantSpec(Raw(type: UInt8), uuid=119), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %448:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=118), constant:[0]]) -> (%449:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=119)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=119), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116), )] (%449:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=119)], %444:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114)], %447:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116)]) -> (%450:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=120), )] (%450:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116)]) -> (%451:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=120)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=120), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121), )] (%451:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=120)], %441:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%452:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121), )] (%452:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121)]) -> (%453:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121), )] (%453:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121)]) -> (%453:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=123), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=122))] (%453:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121)]) -> (%454:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=123)]) - cf.ReturnOp (%454:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=123)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=111)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=113)]) -> () + (%8117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8017:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8018:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%8149:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)]) { + linalg.CPU.LinearOp (%8117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100)]) -> (%8118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=106)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=102))] (%8117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100)]) -> (%8119:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=104))] (%8117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100)]) -> (%8120:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=106), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=106), )] (%8118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=106)]) -> (%8118:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=106)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=106), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=106), )] (%8118:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=106)]) -> (%8121:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=106)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103), )] (%8119:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103)]) -> (%8119:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103), )] (%8119:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103)]) -> (%8122:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105), )] (%8120:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105)]) -> (%8120:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105), )] (%8120:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105)]) -> (%8123:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=106), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=108))] (%8121:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=106)]) -> (%8124:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=110))] (%8122:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103)]) -> (%8125:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107), )] (%8124:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8126:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109), )] (%8125:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8127:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109), outputs_0:QuantSpec(Raw(type: Float16), uuid=111), )] (%8127:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109)]) -> (%8128:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=111)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=111), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112), )] (%8128:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=111)]) -> (%8129:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112), )] (%8129:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)]) -> (%8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105), outputs_0:QuantSpec(Raw(type: Float16), uuid=113), )] (%8123:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105)]) -> (%8131:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=113)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=113), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114), )] (%8131:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=113)]) -> (%8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), )] (%8017:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)]) -> (%8133:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), )] (%8018:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)]) -> (%8134:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), )] (%8133:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) -> (%8135:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), )] (%8134:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%8136:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115), )] (%8126:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107)], %8135:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) -> (%8137:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115), inputs_1:QuantSpec(Raw(type: Float32), uuid=116), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115), )] (%8137:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115)], %8138:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=116), constant:[0.088388346]]) -> (%8139:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117), )] (%8139:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115)]) -> (%8140:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117), inputs_1:QuantSpec(Raw(type: Int16), uuid=118), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117), )] (%8140:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117)], %8141:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=118), constant:[-20]]) -> (%8142:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=119), outputs_0:QuantSpec(Raw(type: UInt8), uuid=120), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8143:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=119), constant:[0]]) -> (%8144:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=120)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=120), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117), )] (%8144:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=120)], %8139:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115)], %8142:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117)]) -> (%8145:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121), )] (%8145:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117)]) -> (%8146:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122), )] (%8146:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121)], %8136:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%8147:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122), )] (%8147:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122)]) -> (%8148:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122), )] (%8148:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122)]) -> (%8148:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=123))] (%8148:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122)]) -> (%8149:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) + cf.ReturnOp (%8149:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)]) -> () } } graph.SubGraphOp @model.layers.1.mlp [using_qnn:true, symbol:model.layers.1.mlp] { - (%456:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) -> (%461:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=127), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=126))] (%456:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) -> (%457:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=127)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=127), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=128), )] (%457:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=127)]) -> (%458:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=128)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=130), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=129))] (%456:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) -> (%459:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=130)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=128), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=130), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=128), )] (%458:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=128)], %459:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=130)]) -> (%460:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=128)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=128), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=131))] (%460:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=128)]) -> (%461:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132)]) - cf.ReturnOp (%461:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132)]) -> () + (%8151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125)]) -> (%8156:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=128), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=127))] (%8151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125)]) -> (%8152:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=128)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=128), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129), )] (%8152:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=128)]) -> (%8153:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=131), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=130))] (%8151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125)]) -> (%8154:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=131)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=131), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129), )] (%8153:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129)], %8154:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=131)]) -> (%8155:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=132))] (%8155:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129)]) -> (%8156:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)]) + cf.ReturnOp (%8156:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)]) -> () } } graph.SubGraphOp @model.layers.2 [using_qnn:true, symbol:model.layers.2] { - (%462:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %324:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %325:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%503:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=166)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=145)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=147)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133), )] (%462:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132)]) -> (%463:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)]) - graph.CallGraphOp @model.layers.2.self_attn (%463:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %324:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %325:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%495:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=145)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=147)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157), )] (%495:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157)], %462:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132)]) -> (%496:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158), )] (%496:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157)]) -> (%497:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)]) - graph.CallGraphOp @model.layers.2.mlp (%497:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)]) -> (%502:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=166)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=166), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=166), )] (%502:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=166)], %496:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157)]) -> (%503:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=166)]) - cf.ReturnOp (%503:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=166)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=145)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=147)]) -> () + (%8157:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8019:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8020:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%8198:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=135))] (%8157:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)]) -> (%8158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134)]) + graph.CallGraphOp @model.layers.2.self_attn (%8158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8019:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8020:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%8190:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158), )] (%8190:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)], %8157:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)]) -> (%8191:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=160))] (%8191:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)]) -> (%8192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159)]) + graph.CallGraphOp @model.layers.2.mlp (%8192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159)]) -> (%8197:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167), )] (%8197:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)], %8191:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)]) -> (%8198:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) + cf.ReturnOp (%8198:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)]) -> () } } graph.SubGraphOp @model.layers.2.self_attn [using_qnn:true, symbol:model.layers.2.self_attn] { - (%463:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %324:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %325:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%495:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=145)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=147)]) { - linalg.CPU.LinearOp (%463:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)]) -> (%464:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=139)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=136), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=135))] (%463:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)]) -> (%465:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=136)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=138), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=137))] (%463:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)]) -> (%466:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=138)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=139), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=139), )] (%464:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=139)]) -> (%464:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=139)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=139), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=139), )] (%464:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=139)]) -> (%467:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=139)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=136), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=136), )] (%465:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=136)]) -> (%465:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=136)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=136), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=136), )] (%465:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=136)]) -> (%468:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=136)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=138), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=138), )] (%466:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=138)]) -> (%466:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=138)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=138), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=138), )] (%466:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=138)]) -> (%469:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=138)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=139), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=140), )] (%467:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=139)]) -> (%470:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=140)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=136), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=142), )] (%468:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=136)]) -> (%471:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=142)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=140), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=140), )] (%470:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=140)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%472:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=140)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=142), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=142), )] (%471:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=142)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%473:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=142)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=142), outputs_0:QuantSpec(Raw(type: Float16), uuid=144), )] (%473:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=142)]) -> (%474:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=144)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=144), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=145), )] (%474:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=144)]) -> (%475:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=145)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=145), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=145), )] (%475:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=145)]) -> (%476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=145)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=138), outputs_0:QuantSpec(Raw(type: Float16), uuid=146), )] (%469:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=138)]) -> (%477:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=146)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=146), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=147), )] (%477:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=146)]) -> (%478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=147)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=145), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), )] (%324:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=145)]) -> (%479:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=147), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), )] (%325:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=147)]) -> (%480:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), )] (%479:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) -> (%481:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), )] (%480:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%482:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=140), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=148), )] (%472:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=140)], %481:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) -> (%483:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=148)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=148), inputs_1:QuantSpec(Raw(type: Float32), uuid=149), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=148), )] (%483:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=148)], %484:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=149), constant:[0.088388346]]) -> (%485:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=148)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=148), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150), )] (%485:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=148)]) -> (%486:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150), inputs_1:QuantSpec(Raw(type: Int16), uuid=151), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150), )] (%486:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150)], %487:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=151), constant:[-20]]) -> (%488:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=152), outputs_0:QuantSpec(Raw(type: UInt8), uuid=153), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %489:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=152), constant:[0]]) -> (%490:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=153)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=153), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=148), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150), )] (%490:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=153)], %485:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=148)], %488:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150)]) -> (%491:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154), )] (%491:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150)]) -> (%492:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155), )] (%492:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154)], %482:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%493:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155), )] (%493:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155)]) -> (%494:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155), )] (%494:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155)]) -> (%494:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=156))] (%494:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155)]) -> (%495:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157)]) - cf.ReturnOp (%495:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=145)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=147)]) -> () + (%8158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8019:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8020:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%8190:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)]) { + linalg.CPU.LinearOp (%8158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134)]) -> (%8159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=140)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=136))] (%8158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134)]) -> (%8160:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=138))] (%8158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134)]) -> (%8161:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=140), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=140), )] (%8159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=140)]) -> (%8159:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=140)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=140), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=140), )] (%8159:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=140)]) -> (%8162:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=140)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137), )] (%8160:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137)]) -> (%8160:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137), )] (%8160:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137)]) -> (%8163:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139), )] (%8161:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139)]) -> (%8161:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139), )] (%8161:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139)]) -> (%8164:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=140), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=141), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=142))] (%8162:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=140)]) -> (%8165:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=141)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=144))] (%8163:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137)]) -> (%8166:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=141), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=141), )] (%8165:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=141)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8167:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=141)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143), )] (%8166:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8168:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143), outputs_0:QuantSpec(Raw(type: Float16), uuid=145), )] (%8168:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143)]) -> (%8169:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=145)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=145), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146), )] (%8169:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=145)]) -> (%8170:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146), )] (%8170:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)]) -> (%8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139), outputs_0:QuantSpec(Raw(type: Float16), uuid=147), )] (%8164:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139)]) -> (%8172:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=147)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=147), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148), )] (%8172:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=147)]) -> (%8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), )] (%8019:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)]) -> (%8174:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), )] (%8020:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)]) -> (%8175:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), )] (%8174:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) -> (%8176:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), )] (%8175:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%8177:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=141), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149), )] (%8167:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=141)], %8176:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) -> (%8178:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149), inputs_1:QuantSpec(Raw(type: Float32), uuid=150), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149), )] (%8178:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149)], %8179:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=150), constant:[0.088388346]]) -> (%8180:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151), )] (%8180:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149)]) -> (%8181:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151), inputs_1:QuantSpec(Raw(type: Int16), uuid=152), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151), )] (%8181:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151)], %8182:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=152), constant:[-20]]) -> (%8183:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=153), outputs_0:QuantSpec(Raw(type: UInt8), uuid=154), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8184:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=153), constant:[0]]) -> (%8185:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=154)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=154), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151), )] (%8185:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=154)], %8180:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149)], %8183:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151)]) -> (%8186:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155), )] (%8186:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151)]) -> (%8187:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156), )] (%8187:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155)], %8177:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%8188:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156), )] (%8188:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156)]) -> (%8189:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156), )] (%8189:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156)]) -> (%8189:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=157))] (%8189:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156)]) -> (%8190:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)]) + cf.ReturnOp (%8190:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)]) -> () } } graph.SubGraphOp @model.layers.2.mlp [using_qnn:true, symbol:model.layers.2.mlp] { - (%497:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)]) -> (%502:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=166)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=161), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=160))] (%497:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)]) -> (%498:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=161)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=161), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162), )] (%498:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=161)]) -> (%499:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=164), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=163))] (%497:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)]) -> (%500:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=164)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=164), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162), )] (%499:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162)], %500:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=164)]) -> (%501:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=166), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=165))] (%501:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162)]) -> (%502:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=166)]) - cf.ReturnOp (%502:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=166)]) -> () + (%8192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159)]) -> (%8197:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=161))] (%8192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159)]) -> (%8193:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=163), )] (%8193:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162)]) -> (%8194:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=163)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=165), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=164))] (%8192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159)]) -> (%8195:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=165)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=163), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=165), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=163), )] (%8194:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=163)], %8195:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=165)]) -> (%8196:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=163)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=163), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=166))] (%8196:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=163)]) -> (%8197:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) + cf.ReturnOp (%8197:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) -> () } } graph.SubGraphOp @model.layers.3 [using_qnn:true, symbol:model.layers.3] { - (%503:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=166)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %326:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %327:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%544:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=200)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=179)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=181)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=166), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167), )] (%503:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=166)]) -> (%504:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) - graph.CallGraphOp @model.layers.3.self_attn (%504:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %326:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %327:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%536:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=179)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=181)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=166), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191), )] (%536:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191)], %503:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=166)]) -> (%537:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192), )] (%537:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191)]) -> (%538:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)]) - graph.CallGraphOp @model.layers.3.mlp (%538:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)]) -> (%543:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=200)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=200), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=200), )] (%543:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=200)], %537:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191)]) -> (%544:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=200)]) - cf.ReturnOp (%544:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=200)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=179)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=181)]) -> () + (%8198:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8021:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8022:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%8239:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=169))] (%8198:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) -> (%8199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168)]) + graph.CallGraphOp @model.layers.3.self_attn (%8199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8021:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8022:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%8231:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192), )] (%8231:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)], %8198:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) -> (%8232:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=193), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=194))] (%8232:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)]) -> (%8233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=193)]) + graph.CallGraphOp @model.layers.3.mlp (%8233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=193)]) -> (%8238:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201), )] (%8238:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)], %8232:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)]) -> (%8239:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)]) + cf.ReturnOp (%8239:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)]) -> () } } graph.SubGraphOp @model.layers.3.self_attn [using_qnn:true, symbol:model.layers.3.self_attn] { - (%504:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %326:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %327:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%536:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=179)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=181)]) { - linalg.CPU.LinearOp (%504:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) -> (%505:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=173)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=170), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=169))] (%504:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) -> (%506:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=170)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=172), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=171))] (%504:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) -> (%507:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=172)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=173), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=173), )] (%505:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=173)]) -> (%505:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=173)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=173), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=173), )] (%505:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=173)]) -> (%508:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=173)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=170), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=170), )] (%506:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=170)]) -> (%506:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=170)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=170), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=170), )] (%506:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=170)]) -> (%509:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=170)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=172), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=172), )] (%507:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=172)]) -> (%507:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=172)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=172), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=172), )] (%507:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=172)]) -> (%510:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=172)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=173), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174), )] (%508:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=173)]) -> (%511:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=170), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176), )] (%509:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=170)]) -> (%512:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174), )] (%511:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%513:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176), )] (%512:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%514:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176), outputs_0:QuantSpec(Raw(type: Float16), uuid=178), )] (%514:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176)]) -> (%515:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=178)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=178), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=179), )] (%515:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=178)]) -> (%516:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=179)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=179), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=179), )] (%516:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=179)]) -> (%517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=179)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=172), outputs_0:QuantSpec(Raw(type: Float16), uuid=180), )] (%510:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=172)]) -> (%518:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=180)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=180), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=181), )] (%518:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=180)]) -> (%519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=181)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=179), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), )] (%326:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=179)]) -> (%520:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=181), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), )] (%327:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=181)]) -> (%521:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), )] (%520:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) -> (%522:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), )] (%521:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%523:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=182), )] (%513:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174)], %522:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) -> (%524:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=182)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=182), inputs_1:QuantSpec(Raw(type: Float32), uuid=183), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=182), )] (%524:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=182)], %525:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=183), constant:[0.088388346]]) -> (%526:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=182)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=182), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184), )] (%526:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=182)]) -> (%527:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184), inputs_1:QuantSpec(Raw(type: Int16), uuid=185), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184), )] (%527:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)], %528:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=185), constant:[-20]]) -> (%529:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=186), outputs_0:QuantSpec(Raw(type: UInt8), uuid=187), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %530:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=186), constant:[0]]) -> (%531:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=187)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=187), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=182), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184), )] (%531:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=187)], %526:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=182)], %529:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)]) -> (%532:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=188), )] (%532:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)]) -> (%533:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=188)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=188), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189), )] (%533:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=188)], %523:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%534:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189), )] (%534:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189)]) -> (%535:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189), )] (%535:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189)]) -> (%535:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=190))] (%535:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189)]) -> (%536:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191)]) - cf.ReturnOp (%536:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=179)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=181)]) -> () + (%8199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8021:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8022:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%8231:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)]) { + linalg.CPU.LinearOp (%8199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168)]) -> (%8200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=174)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=170))] (%8199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168)]) -> (%8201:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=172))] (%8199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168)]) -> (%8202:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=174), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=174), )] (%8200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=174)]) -> (%8200:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=174)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=174), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=174), )] (%8200:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=174)]) -> (%8203:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=174)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171), )] (%8201:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171)]) -> (%8201:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171), )] (%8201:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171)]) -> (%8204:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173), )] (%8202:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173)]) -> (%8202:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173), )] (%8202:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173)]) -> (%8205:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=174), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=175), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=176))] (%8203:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=174)]) -> (%8206:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=175)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=178))] (%8204:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171)]) -> (%8207:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=175), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=175), )] (%8206:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=175)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8208:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=175)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177), )] (%8207:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8209:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177), outputs_0:QuantSpec(Raw(type: Float16), uuid=179), )] (%8209:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177)]) -> (%8210:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=179)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=179), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180), )] (%8210:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=179)]) -> (%8211:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180), )] (%8211:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)]) -> (%8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173), outputs_0:QuantSpec(Raw(type: Float16), uuid=181), )] (%8205:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173)]) -> (%8213:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=181)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=181), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182), )] (%8213:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=181)]) -> (%8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), )] (%8021:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)]) -> (%8215:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), )] (%8022:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)]) -> (%8216:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), )] (%8215:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) -> (%8217:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), )] (%8216:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%8218:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=175), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183), )] (%8208:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=175)], %8217:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) -> (%8219:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183), inputs_1:QuantSpec(Raw(type: Float32), uuid=184), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183), )] (%8219:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183)], %8220:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=184), constant:[0.088388346]]) -> (%8221:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185), )] (%8221:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183)]) -> (%8222:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185), inputs_1:QuantSpec(Raw(type: Int16), uuid=186), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185), )] (%8222:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185)], %8223:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=186), constant:[-20]]) -> (%8224:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=187), outputs_0:QuantSpec(Raw(type: UInt8), uuid=188), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8225:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=187), constant:[0]]) -> (%8226:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=188)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=188), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185), )] (%8226:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=188)], %8221:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183)], %8224:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185)]) -> (%8227:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189), )] (%8227:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185)]) -> (%8228:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190), )] (%8228:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189)], %8218:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%8229:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190), )] (%8229:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190)]) -> (%8230:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190), )] (%8230:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190)]) -> (%8230:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=191))] (%8230:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190)]) -> (%8231:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)]) + cf.ReturnOp (%8231:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)]) -> () } } graph.SubGraphOp @model.layers.3.mlp [using_qnn:true, symbol:model.layers.3.mlp] { - (%538:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)]) -> (%543:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=200)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=195), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=194))] (%538:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)]) -> (%539:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=195)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=195), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=196), )] (%539:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=195)]) -> (%540:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=196)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=198), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=197))] (%538:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)]) -> (%541:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=198)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=196), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=198), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=196), )] (%540:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=196)], %541:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=198)]) -> (%542:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=196)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=196), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=200), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=199))] (%542:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=196)]) -> (%543:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=200)]) - cf.ReturnOp (%543:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=200)]) -> () + (%8233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=193)]) -> (%8238:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=193), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=196), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=195))] (%8233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=193)]) -> (%8234:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=196)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=196), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197), )] (%8234:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=196)]) -> (%8235:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=193), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=198))] (%8233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=193)]) -> (%8236:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197), )] (%8235:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197)], %8236:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199)]) -> (%8237:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=200))] (%8237:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197)]) -> (%8238:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)]) + cf.ReturnOp (%8238:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)]) -> () } } graph.SubGraphOp @model.layers.4 [using_qnn:true, symbol:model.layers.4] { - (%544:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=200)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %328:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %329:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%585:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=213)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=215)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=200), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201), )] (%544:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=200)]) -> (%545:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)]) - graph.CallGraphOp @model.layers.4.self_attn (%545:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %328:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %329:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%577:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=225)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=213)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=215)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=225), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=200), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=225), )] (%577:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=225)], %544:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=200)]) -> (%578:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=225)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=225), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226), )] (%578:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=225)]) -> (%579:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)]) - graph.CallGraphOp @model.layers.4.mlp (%579:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)]) -> (%584:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=225), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234), )] (%584:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)], %578:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=225)]) -> (%585:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)]) - cf.ReturnOp (%585:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=213)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=215)]) -> () + (%8239:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8023:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8024:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%8280:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=203))] (%8239:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)]) -> (%8240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202)]) + graph.CallGraphOp @model.layers.4.self_attn (%8240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8023:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8024:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%8272:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226), )] (%8272:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)], %8239:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)]) -> (%8273:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=228))] (%8273:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)]) -> (%8274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227)]) + graph.CallGraphOp @model.layers.4.mlp (%8274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227)]) -> (%8279:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235), )] (%8279:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)], %8273:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)]) -> (%8280:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)]) + cf.ReturnOp (%8280:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)]) -> () } } graph.SubGraphOp @model.layers.4.self_attn [using_qnn:true, symbol:model.layers.4.self_attn] { - (%545:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %328:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %329:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%577:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=225)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=213)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=215)]) { - linalg.CPU.LinearOp (%545:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)]) -> (%546:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=207)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=203))] (%545:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)]) -> (%547:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=205))] (%545:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)]) -> (%548:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=207), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=207), )] (%546:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=207)]) -> (%546:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=207)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=207), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=207), )] (%546:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=207)]) -> (%549:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=207)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204), )] (%547:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204)]) -> (%547:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204), )] (%547:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204)]) -> (%550:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206), )] (%548:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206)]) -> (%548:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206), )] (%548:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206)]) -> (%551:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=207), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=208), )] (%549:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=207)]) -> (%552:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=208)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=210), )] (%550:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204)]) -> (%553:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=210)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=208), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=208), )] (%552:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=208)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%554:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=208)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=210), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=210), )] (%553:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=210)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%555:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=210)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=210), outputs_0:QuantSpec(Raw(type: Float16), uuid=212), )] (%555:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=210)]) -> (%556:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=212)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=212), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=213), )] (%556:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=212)]) -> (%557:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=213)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=213), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=213), )] (%557:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=213)]) -> (%558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=213)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206), outputs_0:QuantSpec(Raw(type: Float16), uuid=214), )] (%551:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206)]) -> (%559:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=214)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=214), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=215), )] (%559:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=214)]) -> (%560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=215)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=213), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), )] (%328:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=213)]) -> (%561:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=215), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), )] (%329:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=215)]) -> (%562:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), )] (%561:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) -> (%563:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), )] (%562:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%564:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=208), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=216), )] (%554:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=208)], %563:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) -> (%565:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=216)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=216), inputs_1:QuantSpec(Raw(type: Float32), uuid=217), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=216), )] (%565:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=216)], %566:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=217), constant:[0.088388346]]) -> (%567:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=216)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=216), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=218), )] (%567:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=216)]) -> (%568:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=218)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=218), inputs_1:QuantSpec(Raw(type: Int16), uuid=219), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=218), )] (%568:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=218)], %569:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=219), constant:[-20]]) -> (%570:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=218)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=220), outputs_0:QuantSpec(Raw(type: UInt8), uuid=221), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %571:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=220), constant:[0]]) -> (%572:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=221)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=221), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=216), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=218), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=218), )] (%572:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=221)], %567:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=216)], %570:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=218)]) -> (%573:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=218)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=218), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=222), )] (%573:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=218)]) -> (%574:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=222)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=222), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223), )] (%574:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=222)], %564:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%575:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223), )] (%575:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223)]) -> (%576:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223), )] (%576:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223)]) -> (%576:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=225), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=224))] (%576:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223)]) -> (%577:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=225)]) - cf.ReturnOp (%577:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=225)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=213)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=215)]) -> () + (%8240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8023:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8024:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%8272:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)]) { + linalg.CPU.LinearOp (%8240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202)]) -> (%8241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=208)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=204))] (%8240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202)]) -> (%8242:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=206))] (%8240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202)]) -> (%8243:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=208), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=208), )] (%8241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=208)]) -> (%8241:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=208)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=208), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=208), )] (%8241:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=208)]) -> (%8244:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=208)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205), )] (%8242:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205)]) -> (%8242:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205), )] (%8242:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205)]) -> (%8245:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207), )] (%8243:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207)]) -> (%8243:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207), )] (%8243:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207)]) -> (%8246:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=208), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=210))] (%8244:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=208)]) -> (%8247:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=211), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=212))] (%8245:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205)]) -> (%8248:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=211)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209), )] (%8247:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8249:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=211), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=211), )] (%8248:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=211)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8250:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=211)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=211), outputs_0:QuantSpec(Raw(type: Float16), uuid=213), )] (%8250:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=211)]) -> (%8251:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=213)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=213), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214), )] (%8251:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=213)]) -> (%8252:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214), )] (%8252:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)]) -> (%8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207), outputs_0:QuantSpec(Raw(type: Float16), uuid=215), )] (%8246:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207)]) -> (%8254:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=215)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=215), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216), )] (%8254:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=215)]) -> (%8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), )] (%8023:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)]) -> (%8256:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), )] (%8024:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)]) -> (%8257:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), )] (%8256:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) -> (%8258:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), )] (%8257:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%8259:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217), )] (%8249:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209)], %8258:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) -> (%8260:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217), inputs_1:QuantSpec(Raw(type: Float32), uuid=218), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217), )] (%8260:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217)], %8261:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=218), constant:[0.088388346]]) -> (%8262:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219), )] (%8262:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217)]) -> (%8263:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219), inputs_1:QuantSpec(Raw(type: Int16), uuid=220), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219), )] (%8263:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219)], %8264:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=220), constant:[-20]]) -> (%8265:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=221), outputs_0:QuantSpec(Raw(type: UInt8), uuid=222), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8266:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=221), constant:[0]]) -> (%8267:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=222)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=222), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219), )] (%8267:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=222)], %8262:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217)], %8265:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219)]) -> (%8268:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223), )] (%8268:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219)]) -> (%8269:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224), )] (%8269:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223)], %8259:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%8270:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224), )] (%8270:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224)]) -> (%8271:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224), )] (%8271:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224)]) -> (%8271:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=225))] (%8271:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224)]) -> (%8272:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)]) + cf.ReturnOp (%8272:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)]) -> () } } graph.SubGraphOp @model.layers.4.mlp [using_qnn:true, symbol:model.layers.4.mlp] { - (%579:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)]) -> (%584:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=229), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=228))] (%579:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)]) -> (%580:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=229)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=229), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=230), )] (%580:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=229)]) -> (%581:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=230)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=232), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=231))] (%579:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)]) -> (%582:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=232)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=230), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=232), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=230), )] (%581:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=230)], %582:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=232)]) -> (%583:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=230)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=230), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=233))] (%583:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=230)]) -> (%584:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)]) - cf.ReturnOp (%584:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)]) -> () + (%8274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227)]) -> (%8279:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=230), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=229))] (%8274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227)]) -> (%8275:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=230)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=230), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=231), )] (%8275:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=230)]) -> (%8276:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=231)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=233), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=232))] (%8274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227)]) -> (%8277:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=233)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=231), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=233), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=231), )] (%8276:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=231)], %8277:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=233)]) -> (%8278:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=231)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=231), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=234))] (%8278:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=231)]) -> (%8279:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)]) + cf.ReturnOp (%8279:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)]) -> () } } graph.SubGraphOp @model.layers.5 [using_qnn:true, symbol:model.layers.5] { - (%585:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %330:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %331:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%626:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=268)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=247)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=249)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235), )] (%585:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)]) -> (%586:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)]) - graph.CallGraphOp @model.layers.5.self_attn (%586:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %330:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %331:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%618:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=247)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=249)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259), )] (%618:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259)], %585:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)]) -> (%619:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260), )] (%619:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259)]) -> (%620:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)]) - graph.CallGraphOp @model.layers.5.mlp (%620:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)]) -> (%625:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=268)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=268), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=268), )] (%625:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=268)], %619:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259)]) -> (%626:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=268)]) - cf.ReturnOp (%626:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=268)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=247)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=249)]) -> () + (%8280:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8025:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8026:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%8321:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=237))] (%8280:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)]) -> (%8281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)]) + graph.CallGraphOp @model.layers.5.self_attn (%8281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8025:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8026:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%8313:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260), )] (%8313:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)], %8280:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)]) -> (%8314:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=261), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=262))] (%8314:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)]) -> (%8315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=261)]) + graph.CallGraphOp @model.layers.5.mlp (%8315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=261)]) -> (%8320:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269), )] (%8320:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)], %8314:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)]) -> (%8321:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) + cf.ReturnOp (%8321:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)]) -> () } } graph.SubGraphOp @model.layers.5.self_attn [using_qnn:true, symbol:model.layers.5.self_attn] { - (%586:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %330:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %331:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%618:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=247)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=249)]) { - linalg.CPU.LinearOp (%586:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)]) -> (%587:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=241)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=238), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=237))] (%586:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)]) -> (%588:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=238)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=239))] (%586:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)]) -> (%589:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=241), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=241), )] (%587:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=241)]) -> (%587:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=241)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=241), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=241), )] (%587:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=241)]) -> (%590:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=241)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=238), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=238), )] (%588:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=238)]) -> (%588:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=238)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=238), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=238), )] (%588:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=238)]) -> (%591:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=238)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240), )] (%589:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240)]) -> (%589:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240), )] (%589:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240)]) -> (%592:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=241), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=242), )] (%590:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=241)]) -> (%593:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=242)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=238), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244), )] (%591:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=238)]) -> (%594:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=242), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=242), )] (%593:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=242)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%595:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=242)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244), )] (%594:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%596:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244), outputs_0:QuantSpec(Raw(type: Float16), uuid=246), )] (%596:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244)]) -> (%597:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=246)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=246), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=247), )] (%597:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=246)]) -> (%598:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=247)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=247), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=247), )] (%598:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=247)]) -> (%599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=247)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240), outputs_0:QuantSpec(Raw(type: Float16), uuid=248), )] (%592:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240)]) -> (%600:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=248)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=248), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=249), )] (%600:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=248)]) -> (%601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=249)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=247), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), )] (%330:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=247)]) -> (%602:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=249), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), )] (%331:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=249)]) -> (%603:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), )] (%602:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) -> (%604:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), )] (%603:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%605:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=242), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=250), )] (%595:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=242)], %604:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) -> (%606:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=250)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=250), inputs_1:QuantSpec(Raw(type: Float32), uuid=251), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=250), )] (%606:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=250)], %607:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=251), constant:[0.088388346]]) -> (%608:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=250)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=250), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252), )] (%608:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=250)]) -> (%609:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252), inputs_1:QuantSpec(Raw(type: Int16), uuid=253), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252), )] (%609:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252)], %610:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=253), constant:[-20]]) -> (%611:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=254), outputs_0:QuantSpec(Raw(type: UInt8), uuid=255), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %612:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=254), constant:[0]]) -> (%613:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=255)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=255), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=250), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252), )] (%613:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=255)], %608:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=250)], %611:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252)]) -> (%614:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=256), )] (%614:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252)]) -> (%615:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=256)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=256), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257), )] (%615:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=256)], %605:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%616:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257), )] (%616:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257)]) -> (%617:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257), )] (%617:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257)]) -> (%617:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=258))] (%617:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257)]) -> (%618:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259)]) - cf.ReturnOp (%618:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=247)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=249)]) -> () + (%8281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8025:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8026:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%8313:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)]) { + linalg.CPU.LinearOp (%8281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)]) -> (%8282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=242)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=238))] (%8281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)]) -> (%8283:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=240))] (%8281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)]) -> (%8284:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=242), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=242), )] (%8282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=242)]) -> (%8282:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=242)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=242), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=242), )] (%8282:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=242)]) -> (%8285:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=242)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239), )] (%8283:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239)]) -> (%8283:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239), )] (%8283:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239)]) -> (%8286:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241), )] (%8284:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241)]) -> (%8284:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241), )] (%8284:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241)]) -> (%8287:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=242), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=243), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=244))] (%8285:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=242)]) -> (%8288:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=243)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=246))] (%8286:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239)]) -> (%8289:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=243), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=243), )] (%8288:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=243)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8290:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=243)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245), )] (%8289:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8291:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245), outputs_0:QuantSpec(Raw(type: Float16), uuid=247), )] (%8291:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245)]) -> (%8292:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=247)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=247), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248), )] (%8292:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=247)]) -> (%8293:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248), )] (%8293:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)]) -> (%8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241), outputs_0:QuantSpec(Raw(type: Float16), uuid=249), )] (%8287:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241)]) -> (%8295:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=249)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=249), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250), )] (%8295:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=249)]) -> (%8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), )] (%8025:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)]) -> (%8297:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), )] (%8026:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)]) -> (%8298:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), )] (%8297:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) -> (%8299:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), )] (%8298:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%8300:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=243), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251), )] (%8290:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=243)], %8299:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) -> (%8301:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251), inputs_1:QuantSpec(Raw(type: Float32), uuid=252), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251), )] (%8301:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251)], %8302:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=252), constant:[0.088388346]]) -> (%8303:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253), )] (%8303:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251)]) -> (%8304:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253), inputs_1:QuantSpec(Raw(type: Int16), uuid=254), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253), )] (%8304:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253)], %8305:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=254), constant:[-20]]) -> (%8306:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=255), outputs_0:QuantSpec(Raw(type: UInt8), uuid=256), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8307:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=255), constant:[0]]) -> (%8308:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=256)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=256), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253), )] (%8308:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=256)], %8303:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251)], %8306:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253)]) -> (%8309:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257), )] (%8309:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253)]) -> (%8310:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258), )] (%8310:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257)], %8300:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%8311:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258), )] (%8311:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258)]) -> (%8312:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258), )] (%8312:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258)]) -> (%8312:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=259))] (%8312:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258)]) -> (%8313:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)]) + cf.ReturnOp (%8313:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)]) -> () } } graph.SubGraphOp @model.layers.5.mlp [using_qnn:true, symbol:model.layers.5.mlp] { - (%620:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)]) -> (%625:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=268)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=263), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=262))] (%620:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)]) -> (%621:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=263)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=263), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264), )] (%621:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=263)]) -> (%622:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=266), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=265))] (%620:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)]) -> (%623:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=266)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=266), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264), )] (%622:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264)], %623:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=266)]) -> (%624:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=268), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=267))] (%624:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264)]) -> (%625:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=268)]) - cf.ReturnOp (%625:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=268)]) -> () + (%8315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=261)]) -> (%8320:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=261), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=263))] (%8315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=261)]) -> (%8316:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=265), )] (%8316:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264)]) -> (%8317:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=265)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=261), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=267), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=266))] (%8315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=261)]) -> (%8318:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=267)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=265), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=267), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=265), )] (%8317:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=265)], %8318:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=267)]) -> (%8319:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=265)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=265), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=268))] (%8319:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=265)]) -> (%8320:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) + cf.ReturnOp (%8320:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) -> () } } graph.SubGraphOp @model.layers.6 [using_qnn:true, symbol:model.layers.6] { - (%626:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=268)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %332:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %333:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%667:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=281)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=283)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=268), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269), )] (%626:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=268)]) -> (%627:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) - graph.CallGraphOp @model.layers.6.self_attn (%627:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %332:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %333:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%659:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=293)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=281)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=283)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=293), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=268), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=293), )] (%659:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=293)], %626:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=268)]) -> (%660:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=293)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=293), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), )] (%660:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=293)]) -> (%661:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) - graph.CallGraphOp @model.layers.6.mlp (%661:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) -> (%666:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=293), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302), )] (%666:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302)], %660:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=293)]) -> (%667:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302)]) - cf.ReturnOp (%667:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=281)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=283)]) -> () + (%8321:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8027:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8028:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%8362:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=271))] (%8321:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) -> (%8322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270)]) + graph.CallGraphOp @model.layers.6.self_attn (%8322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8027:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8028:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%8354:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), )] (%8354:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)], %8321:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) -> (%8355:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=295), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=296))] (%8355:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) -> (%8356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=295)]) + graph.CallGraphOp @model.layers.6.mlp (%8356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=295)]) -> (%8361:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303), )] (%8361:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)], %8355:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) -> (%8362:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)]) + cf.ReturnOp (%8362:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) -> () } } graph.SubGraphOp @model.layers.6.self_attn [using_qnn:true, symbol:model.layers.6.self_attn] { - (%627:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %332:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %333:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%659:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=293)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=281)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=283)]) { - linalg.CPU.LinearOp (%627:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) -> (%628:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=275)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=272), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=271))] (%627:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) -> (%629:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=272)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=273))] (%627:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) -> (%630:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=275), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=275), )] (%628:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=275)]) -> (%628:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=275)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=275), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=275), )] (%628:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=275)]) -> (%631:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=275)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=272), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=272), )] (%629:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=272)]) -> (%629:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=272)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=272), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=272), )] (%629:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=272)]) -> (%632:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=272)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274), )] (%630:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)]) -> (%630:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274), )] (%630:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)]) -> (%633:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=275), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=276), )] (%631:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=275)]) -> (%634:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=276)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=272), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=278), )] (%632:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=272)]) -> (%635:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=278)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=276), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=276), )] (%634:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=276)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%636:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=276)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=278), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=278), )] (%635:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=278)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%637:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=278)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=278), outputs_0:QuantSpec(Raw(type: Float16), uuid=280), )] (%637:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=278)]) -> (%638:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=280)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=280), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=281), )] (%638:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=280)]) -> (%639:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=281)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=281), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=281), )] (%639:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=281)]) -> (%640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=281)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274), outputs_0:QuantSpec(Raw(type: Float16), uuid=282), )] (%633:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)]) -> (%641:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=282)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=282), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=283), )] (%641:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=282)]) -> (%642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=283)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=281), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), )] (%332:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=281)]) -> (%643:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=283), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), )] (%333:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=283)]) -> (%644:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), )] (%643:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) -> (%645:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), )] (%644:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%646:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=276), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=284), )] (%636:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=276)], %645:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) -> (%647:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=284)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=284), inputs_1:QuantSpec(Raw(type: Float32), uuid=285), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=284), )] (%647:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=284)], %648:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=285), constant:[0.088388346]]) -> (%649:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=284)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=284), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=286), )] (%649:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=284)]) -> (%650:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=286)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=286), inputs_1:QuantSpec(Raw(type: Int16), uuid=287), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=286), )] (%650:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=286)], %651:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=287), constant:[-20]]) -> (%652:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=286)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=288), outputs_0:QuantSpec(Raw(type: UInt8), uuid=289), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %653:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=288), constant:[0]]) -> (%654:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=289)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=289), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=284), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=286), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=286), )] (%654:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=289)], %649:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=284)], %652:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=286)]) -> (%655:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=286)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=286), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=290), )] (%655:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=286)]) -> (%656:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=290)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=290), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291), )] (%656:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=290)], %646:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%657:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291), )] (%657:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291)]) -> (%658:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291), )] (%658:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291)]) -> (%658:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=293), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=292))] (%658:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291)]) -> (%659:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=293)]) - cf.ReturnOp (%659:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=293)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=281)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=283)]) -> () + (%8322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8027:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8028:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%8354:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) { + linalg.CPU.LinearOp (%8322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270)]) -> (%8323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=276)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=272))] (%8322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270)]) -> (%8324:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=274))] (%8322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270)]) -> (%8325:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=276), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=276), )] (%8323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=276)]) -> (%8323:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=276)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=276), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=276), )] (%8323:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=276)]) -> (%8326:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=276)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273), )] (%8324:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273)]) -> (%8324:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273), )] (%8324:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273)]) -> (%8327:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275), )] (%8325:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275)]) -> (%8325:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275), )] (%8325:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275)]) -> (%8328:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=276), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=278))] (%8326:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=276)]) -> (%8329:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=280))] (%8327:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273)]) -> (%8330:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277), )] (%8329:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8331:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279), )] (%8330:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8332:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279), outputs_0:QuantSpec(Raw(type: Float16), uuid=281), )] (%8332:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279)]) -> (%8333:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=281)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=281), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282), )] (%8333:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=281)]) -> (%8334:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282), )] (%8334:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)]) -> (%8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275), outputs_0:QuantSpec(Raw(type: Float16), uuid=283), )] (%8328:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275)]) -> (%8336:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=283)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=283), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284), )] (%8336:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=283)]) -> (%8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), )] (%8027:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)]) -> (%8338:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), )] (%8028:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) -> (%8339:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), )] (%8338:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) -> (%8340:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), )] (%8339:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%8341:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285), )] (%8331:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277)], %8340:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) -> (%8342:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285), inputs_1:QuantSpec(Raw(type: Float32), uuid=286), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285), )] (%8342:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285)], %8343:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=286), constant:[0.088388346]]) -> (%8344:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287), )] (%8344:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285)]) -> (%8345:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287), inputs_1:QuantSpec(Raw(type: Int16), uuid=288), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287), )] (%8345:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287)], %8346:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=288), constant:[-20]]) -> (%8347:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=289), outputs_0:QuantSpec(Raw(type: UInt8), uuid=290), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8348:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=289), constant:[0]]) -> (%8349:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=290)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=290), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287), )] (%8349:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=290)], %8344:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285)], %8347:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287)]) -> (%8350:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291), )] (%8350:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287)]) -> (%8351:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292), )] (%8351:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291)], %8341:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%8352:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292), )] (%8352:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292)]) -> (%8353:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292), )] (%8353:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292)]) -> (%8353:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=293))] (%8353:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292)]) -> (%8354:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) + cf.ReturnOp (%8354:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) -> () } } graph.SubGraphOp @model.layers.6.mlp [using_qnn:true, symbol:model.layers.6.mlp] { - (%661:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) -> (%666:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=297), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=296))] (%661:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) -> (%662:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=297)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=297), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=298), )] (%662:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=297)]) -> (%663:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=298)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=300), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=299))] (%661:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) -> (%664:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=300)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=298), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=300), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=298), )] (%663:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=298)], %664:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=300)]) -> (%665:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=298)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=298), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=301))] (%665:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=298)]) -> (%666:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302)]) - cf.ReturnOp (%666:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302)]) -> () + (%8356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=295)]) -> (%8361:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=295), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=298), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=297))] (%8356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=295)]) -> (%8357:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=298)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=298), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299), )] (%8357:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=298)]) -> (%8358:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=295), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=301), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=300))] (%8356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=295)]) -> (%8359:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=301)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=301), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299), )] (%8358:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299)], %8359:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=301)]) -> (%8360:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=302))] (%8360:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299)]) -> (%8361:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)]) + cf.ReturnOp (%8361:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)]) -> () } } graph.SubGraphOp @model.layers.7 [using_qnn:true, symbol:model.layers.7] { - (%667:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %334:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %335:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%708:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=336)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=315)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=317)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303), )] (%667:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302)]) -> (%668:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)]) - graph.CallGraphOp @model.layers.7.self_attn (%668:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %334:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %335:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%700:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=315)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=317)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327), )] (%700:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327)], %667:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302)]) -> (%701:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328), )] (%701:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327)]) -> (%702:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)]) - graph.CallGraphOp @model.layers.7.mlp (%702:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)]) -> (%707:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=336)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=336), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=336), )] (%707:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=336)], %701:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327)]) -> (%708:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=336)]) - cf.ReturnOp (%708:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=336)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=315)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=317)]) -> () + (%8362:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8029:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8030:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%8403:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=305))] (%8362:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)]) -> (%8363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)]) + graph.CallGraphOp @model.layers.7.self_attn (%8363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8029:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8030:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%8395:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328), )] (%8395:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)], %8362:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)]) -> (%8396:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=330))] (%8396:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)]) -> (%8397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329)]) + graph.CallGraphOp @model.layers.7.mlp (%8397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329)]) -> (%8402:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), )] (%8402:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)], %8396:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)]) -> (%8403:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) + cf.ReturnOp (%8403:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)]) -> () } } graph.SubGraphOp @model.layers.7.self_attn [using_qnn:true, symbol:model.layers.7.self_attn] { - (%668:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %334:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %335:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%700:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=315)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=317)]) { - linalg.CPU.LinearOp (%668:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)]) -> (%669:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=309)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=306), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=305))] (%668:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)]) -> (%670:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=306)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=308), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=307))] (%668:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)]) -> (%671:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=308)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=309), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=309), )] (%669:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=309)]) -> (%669:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=309)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=309), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=309), )] (%669:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=309)]) -> (%672:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=309)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=306), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=306), )] (%670:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=306)]) -> (%670:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=306)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=306), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=306), )] (%670:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=306)]) -> (%673:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=306)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=308), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=308), )] (%671:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=308)]) -> (%671:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=308)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=308), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=308), )] (%671:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=308)]) -> (%674:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=308)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=309), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=310), )] (%672:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=309)]) -> (%675:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=310)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=306), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=312), )] (%673:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=306)]) -> (%676:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=312)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=310), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=310), )] (%675:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=310)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%677:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=310)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=312), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=312), )] (%676:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=312)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%678:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=312)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=312), outputs_0:QuantSpec(Raw(type: Float16), uuid=314), )] (%678:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=312)]) -> (%679:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=314)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=314), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=315), )] (%679:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=314)]) -> (%680:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=315)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=315), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=315), )] (%680:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=315)]) -> (%681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=315)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=308), outputs_0:QuantSpec(Raw(type: Float16), uuid=316), )] (%674:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=308)]) -> (%682:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=316)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=316), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=317), )] (%682:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=316)]) -> (%683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=317)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=315), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), )] (%334:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=315)]) -> (%684:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=317), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), )] (%335:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=317)]) -> (%685:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), )] (%684:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) -> (%686:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), )] (%685:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%687:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=310), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=318), )] (%677:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=310)], %686:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) -> (%688:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=318)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=318), inputs_1:QuantSpec(Raw(type: Float32), uuid=319), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=318), )] (%688:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=318)], %689:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=319), constant:[0.088388346]]) -> (%690:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=318)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=318), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=320), )] (%690:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=318)]) -> (%691:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=320)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=320), inputs_1:QuantSpec(Raw(type: Int16), uuid=321), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=320), )] (%691:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=320)], %692:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=321), constant:[-20]]) -> (%693:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=320)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=322), outputs_0:QuantSpec(Raw(type: UInt8), uuid=323), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %694:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=322), constant:[0]]) -> (%695:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=323)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=323), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=318), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=320), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=320), )] (%695:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=323)], %690:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=318)], %693:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=320)]) -> (%696:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=320)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=320), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=324), )] (%696:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=320)]) -> (%697:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=324)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=324), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325), )] (%697:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=324)], %687:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%698:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325), )] (%698:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325)]) -> (%699:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325), )] (%699:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325)]) -> (%699:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=326))] (%699:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325)]) -> (%700:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327)]) - cf.ReturnOp (%700:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=315)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=317)]) -> () + (%8363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8029:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8030:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%8395:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)]) { + linalg.CPU.LinearOp (%8363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)]) -> (%8364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=310)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=306))] (%8363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)]) -> (%8365:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=308))] (%8363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)]) -> (%8366:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=310), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=310), )] (%8364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=310)]) -> (%8364:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=310)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=310), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=310), )] (%8364:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=310)]) -> (%8367:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=310)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307), )] (%8365:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307)]) -> (%8365:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307), )] (%8365:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307)]) -> (%8368:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309), )] (%8366:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309)]) -> (%8366:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309), )] (%8366:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309)]) -> (%8369:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=310), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=312))] (%8367:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=310)]) -> (%8370:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=313), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=314))] (%8368:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307)]) -> (%8371:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=313)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311), )] (%8370:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8372:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=313), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=313), )] (%8371:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=313)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8373:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=313)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=313), outputs_0:QuantSpec(Raw(type: Float16), uuid=315), )] (%8373:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=313)]) -> (%8374:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=315)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=315), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316), )] (%8374:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=315)]) -> (%8375:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316), )] (%8375:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)]) -> (%8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309), outputs_0:QuantSpec(Raw(type: Float16), uuid=317), )] (%8369:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309)]) -> (%8377:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=317)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=317), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318), )] (%8377:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=317)]) -> (%8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), )] (%8029:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)]) -> (%8379:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), )] (%8030:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)]) -> (%8380:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), )] (%8379:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) -> (%8381:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), )] (%8380:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%8382:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319), )] (%8372:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311)], %8381:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) -> (%8383:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319), inputs_1:QuantSpec(Raw(type: Float32), uuid=320), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319), )] (%8383:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319)], %8384:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=320), constant:[0.088388346]]) -> (%8385:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321), )] (%8385:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319)]) -> (%8386:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321), inputs_1:QuantSpec(Raw(type: Int16), uuid=322), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321), )] (%8386:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321)], %8387:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=322), constant:[-20]]) -> (%8388:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=323), outputs_0:QuantSpec(Raw(type: UInt8), uuid=324), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8389:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=323), constant:[0]]) -> (%8390:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=324)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=324), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321), )] (%8390:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=324)], %8385:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319)], %8388:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321)]) -> (%8391:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325), )] (%8391:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321)]) -> (%8392:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326), )] (%8392:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325)], %8382:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%8393:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326), )] (%8393:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)]) -> (%8394:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326), )] (%8394:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)]) -> (%8394:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=327))] (%8394:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)]) -> (%8395:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)]) + cf.ReturnOp (%8395:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)]) -> () } } graph.SubGraphOp @model.layers.7.mlp [using_qnn:true, symbol:model.layers.7.mlp] { - (%702:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)]) -> (%707:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=336)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=331), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=330))] (%702:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)]) -> (%703:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=331)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=331), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332), )] (%703:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=331)]) -> (%704:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=333))] (%702:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)]) -> (%705:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332), )] (%704:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332)], %705:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334)]) -> (%706:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=336), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=335))] (%706:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332)]) -> (%707:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=336)]) - cf.ReturnOp (%707:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=336)]) -> () + (%8397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329)]) -> (%8402:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=331))] (%8397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329)]) -> (%8398:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=333), )] (%8398:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332)]) -> (%8399:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=333)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=335), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=334))] (%8397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329)]) -> (%8400:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=335)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=333), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=335), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=333), )] (%8399:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=333)], %8400:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=335)]) -> (%8401:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=333)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=333), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=336))] (%8401:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=333)]) -> (%8402:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) + cf.ReturnOp (%8402:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) -> () } } graph.SubGraphOp @model.layers.8 [using_qnn:true, symbol:model.layers.8] { - (%708:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=336)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %336:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %337:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%749:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=370)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=349)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=351)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=336), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), )] (%708:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=336)]) -> (%709:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) - graph.CallGraphOp @model.layers.8.self_attn (%709:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %336:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %337:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%741:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=361)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=349)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=351)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=361), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=336), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=361), )] (%741:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=361)], %708:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=336)]) -> (%742:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=361)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=361), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362), )] (%742:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=361)]) -> (%743:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)]) - graph.CallGraphOp @model.layers.8.mlp (%743:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)]) -> (%748:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=370)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=370), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=361), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=370), )] (%748:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=370)], %742:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=361)]) -> (%749:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=370)]) - cf.ReturnOp (%749:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=370)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=349)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=351)]) -> () + (%8403:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8031:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8032:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%8444:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=339))] (%8403:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) -> (%8404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338)]) + graph.CallGraphOp @model.layers.8.self_attn (%8404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8031:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8032:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%8436:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362), )] (%8436:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)], %8403:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) -> (%8437:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=363), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364))] (%8437:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)]) -> (%8438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=363)]) + graph.CallGraphOp @model.layers.8.mlp (%8438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=363)]) -> (%8443:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371), )] (%8443:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)], %8437:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)]) -> (%8444:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) + cf.ReturnOp (%8444:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)]) -> () } } graph.SubGraphOp @model.layers.8.self_attn [using_qnn:true, symbol:model.layers.8.self_attn] { - (%709:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %336:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %337:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%741:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=361)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=349)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=351)]) { - linalg.CPU.LinearOp (%709:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) -> (%710:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=343)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=340), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=339))] (%709:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) -> (%711:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=340)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=341))] (%709:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) -> (%712:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=343), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=343), )] (%710:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=343)]) -> (%710:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=343)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=343), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=343), )] (%710:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=343)]) -> (%713:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=343)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=340), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=340), )] (%711:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=340)]) -> (%711:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=340)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=340), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=340), )] (%711:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=340)]) -> (%714:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=340)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342), )] (%712:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342)]) -> (%712:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342), )] (%712:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342)]) -> (%715:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=343), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=344), )] (%713:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=343)]) -> (%716:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=344)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=340), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=346), )] (%714:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=340)]) -> (%717:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=346)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=344), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=344), )] (%716:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=344)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%718:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=344)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=346), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=346), )] (%717:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=346)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%719:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=346)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=346), outputs_0:QuantSpec(Raw(type: Float16), uuid=348), )] (%719:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=346)]) -> (%720:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=348)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=348), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=349), )] (%720:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=348)]) -> (%721:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=349)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=349), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=349), )] (%721:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=349)]) -> (%722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=349)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342), outputs_0:QuantSpec(Raw(type: Float16), uuid=350), )] (%715:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342)]) -> (%723:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=350)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=350), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=351), )] (%723:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=350)]) -> (%724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=351)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=349), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), )] (%336:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=349)]) -> (%725:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=351), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), )] (%337:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=351)]) -> (%726:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), )] (%725:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) -> (%727:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), )] (%726:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%728:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=344), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=352), )] (%718:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=344)], %727:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) -> (%729:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=352)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=352), inputs_1:QuantSpec(Raw(type: Float32), uuid=353), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=352), )] (%729:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=352)], %730:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=353), constant:[0.088388346]]) -> (%731:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=352)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=352), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354), )] (%731:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=352)]) -> (%732:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354), inputs_1:QuantSpec(Raw(type: Int16), uuid=355), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354), )] (%732:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354)], %733:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=355), constant:[-20]]) -> (%734:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=356), outputs_0:QuantSpec(Raw(type: UInt8), uuid=357), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %735:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=356), constant:[1]]) -> (%736:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=357)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=357), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=352), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354), )] (%736:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=357)], %731:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=352)], %734:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354)]) -> (%737:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=358), )] (%737:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354)]) -> (%738:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=358)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=358), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359), )] (%738:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=358)], %728:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%739:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359), )] (%739:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359)]) -> (%740:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359), )] (%740:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359)]) -> (%740:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=361), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=360))] (%740:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359)]) -> (%741:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=361)]) - cf.ReturnOp (%741:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=361)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=349)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=351)]) -> () + (%8404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8031:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8032:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%8436:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)]) { + linalg.CPU.LinearOp (%8404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338)]) -> (%8405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=344)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=340))] (%8404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338)]) -> (%8406:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=342))] (%8404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338)]) -> (%8407:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=344), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=344), )] (%8405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=344)]) -> (%8405:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=344)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=344), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=344), )] (%8405:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=344)]) -> (%8408:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=344)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341), )] (%8406:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341)]) -> (%8406:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341), )] (%8406:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341)]) -> (%8409:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343), )] (%8407:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343)]) -> (%8407:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343), )] (%8407:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343)]) -> (%8410:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=344), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=345), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=346))] (%8408:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=344)]) -> (%8411:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=345)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=348))] (%8409:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341)]) -> (%8412:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=345), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=345), )] (%8411:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=345)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8413:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=345)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347), )] (%8412:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8414:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347), outputs_0:QuantSpec(Raw(type: Float16), uuid=349), )] (%8414:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347)]) -> (%8415:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=349)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=349), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350), )] (%8415:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=349)]) -> (%8416:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350), )] (%8416:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)]) -> (%8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343), outputs_0:QuantSpec(Raw(type: Float16), uuid=351), )] (%8410:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343)]) -> (%8418:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=351)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=351), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352), )] (%8418:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=351)]) -> (%8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), )] (%8031:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)]) -> (%8420:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), )] (%8032:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)]) -> (%8421:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), )] (%8420:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) -> (%8422:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), )] (%8421:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%8423:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=345), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353), )] (%8413:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=345)], %8422:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) -> (%8424:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353), inputs_1:QuantSpec(Raw(type: Float32), uuid=354), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353), )] (%8424:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353)], %8425:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=354), constant:[0.088388346]]) -> (%8426:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355), )] (%8426:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353)]) -> (%8427:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355), inputs_1:QuantSpec(Raw(type: Int16), uuid=356), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355), )] (%8427:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355)], %8428:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=356), constant:[-20]]) -> (%8429:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=357), outputs_0:QuantSpec(Raw(type: UInt8), uuid=358), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8430:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=357), constant:[1]]) -> (%8431:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=358)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=358), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355), )] (%8431:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=358)], %8426:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353)], %8429:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355)]) -> (%8432:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359), )] (%8432:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355)]) -> (%8433:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360), )] (%8433:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359)], %8423:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%8434:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360), )] (%8434:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360)]) -> (%8435:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360), )] (%8435:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360)]) -> (%8435:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=361))] (%8435:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360)]) -> (%8436:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)]) + cf.ReturnOp (%8436:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)]) -> () } } graph.SubGraphOp @model.layers.8.mlp [using_qnn:true, symbol:model.layers.8.mlp] { - (%743:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)]) -> (%748:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=370)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=365), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=364))] (%743:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)]) -> (%744:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=365)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=365), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=366), )] (%744:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=365)]) -> (%745:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=366)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=368), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=367))] (%743:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)]) -> (%746:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=368)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=366), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=368), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=366), )] (%745:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=366)], %746:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=368)]) -> (%747:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=366)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=366), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=370), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=369))] (%747:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=366)]) -> (%748:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=370)]) - cf.ReturnOp (%748:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=370)]) -> () + (%8438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=363)]) -> (%8443:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=363), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=366), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=365))] (%8438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=363)]) -> (%8439:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=366)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=366), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367), )] (%8439:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=366)]) -> (%8440:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=363), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=368))] (%8438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=363)]) -> (%8441:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367), )] (%8440:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367)], %8441:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369)]) -> (%8442:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=370))] (%8442:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367)]) -> (%8443:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) + cf.ReturnOp (%8443:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) -> () } } graph.SubGraphOp @model.layers.9 [using_qnn:true, symbol:model.layers.9] { - (%749:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=370)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %338:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %339:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%790:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=404)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=383)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=385)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=370), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371), )] (%749:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=370)]) -> (%750:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) - graph.CallGraphOp @model.layers.9.self_attn (%750:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %338:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %339:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%782:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=383)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=385)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=370), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395), )] (%782:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395)], %749:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=370)]) -> (%783:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396), )] (%783:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395)]) -> (%784:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)]) - graph.CallGraphOp @model.layers.9.mlp (%784:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)]) -> (%789:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=404)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=404), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=404), )] (%789:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=404)], %783:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395)]) -> (%790:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=404)]) - cf.ReturnOp (%790:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=404)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=383)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=385)]) -> () + (%8444:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8033:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8034:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%8485:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=373))] (%8444:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) -> (%8445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372)]) + graph.CallGraphOp @model.layers.9.self_attn (%8445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8033:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8034:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%8477:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396), )] (%8477:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)], %8444:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) -> (%8478:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=398))] (%8478:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)]) -> (%8479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397)]) + graph.CallGraphOp @model.layers.9.mlp (%8479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397)]) -> (%8484:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405), )] (%8484:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)], %8478:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)]) -> (%8485:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)]) + cf.ReturnOp (%8485:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)]) -> () } } graph.SubGraphOp @model.layers.9.self_attn [using_qnn:true, symbol:model.layers.9.self_attn] { - (%750:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %338:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %339:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%782:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=383)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=385)]) { - linalg.CPU.LinearOp (%750:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) -> (%751:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=377)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=374), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=373))] (%750:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) -> (%752:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=374)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=376), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=375))] (%750:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) -> (%753:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=376)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=377), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=377), )] (%751:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=377)]) -> (%751:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=377)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=377), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=377), )] (%751:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=377)]) -> (%754:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=377)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=374), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=374), )] (%752:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=374)]) -> (%752:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=374)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=374), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=374), )] (%752:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=374)]) -> (%755:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=374)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=376), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=376), )] (%753:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=376)]) -> (%753:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=376)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=376), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=376), )] (%753:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=376)]) -> (%756:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=376)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=377), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=378), )] (%754:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=377)]) -> (%757:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=378)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=374), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=380), )] (%755:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=374)]) -> (%758:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=380)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=378), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=378), )] (%757:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=378)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%759:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=378)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=380), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=380), )] (%758:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=380)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%760:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=380)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=380), outputs_0:QuantSpec(Raw(type: Float16), uuid=382), )] (%760:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=380)]) -> (%761:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=382)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=382), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=383), )] (%761:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=382)]) -> (%762:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=383)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=383), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=383), )] (%762:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=383)]) -> (%763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=383)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=376), outputs_0:QuantSpec(Raw(type: Float16), uuid=384), )] (%756:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=376)]) -> (%764:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=384)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=384), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=385), )] (%764:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=384)]) -> (%765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=385)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=383), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), )] (%338:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=383)]) -> (%766:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=385), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), )] (%339:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=385)]) -> (%767:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), )] (%766:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) -> (%768:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), )] (%767:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%769:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=378), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386), )] (%759:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=378)], %768:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) -> (%770:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386), inputs_1:QuantSpec(Raw(type: Float32), uuid=387), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386), )] (%770:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386)], %771:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=387), constant:[0.088388346]]) -> (%772:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=388), )] (%772:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386)]) -> (%773:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=388)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=388), inputs_1:QuantSpec(Raw(type: Int16), uuid=389), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=388), )] (%773:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=388)], %774:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=389), constant:[-20]]) -> (%775:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=388)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=390), outputs_0:QuantSpec(Raw(type: UInt8), uuid=391), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %776:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=390), constant:[-0.1796875]]) -> (%777:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=391)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=391), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=388), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=388), )] (%777:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=391)], %772:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386)], %775:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=388)]) -> (%778:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=388)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=388), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=392), )] (%778:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=388)]) -> (%779:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=392)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=392), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393), )] (%779:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=392)], %769:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%780:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393), )] (%780:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393)]) -> (%781:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393), )] (%781:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393)]) -> (%781:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=394))] (%781:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393)]) -> (%782:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395)]) - cf.ReturnOp (%782:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=383)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=385)]) -> () + (%8445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8033:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8034:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%8477:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)]) { + linalg.CPU.LinearOp (%8445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372)]) -> (%8446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=378)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=374))] (%8445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372)]) -> (%8447:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=376))] (%8445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372)]) -> (%8448:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=378), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=378), )] (%8446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=378)]) -> (%8446:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=378)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=378), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=378), )] (%8446:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=378)]) -> (%8449:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=378)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375), )] (%8447:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375)]) -> (%8447:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375), )] (%8447:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375)]) -> (%8450:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377), )] (%8448:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377)]) -> (%8448:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377), )] (%8448:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377)]) -> (%8451:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=378), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=380))] (%8449:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=378)]) -> (%8452:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=381), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=382))] (%8450:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375)]) -> (%8453:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=381)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379), )] (%8452:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8454:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=381), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=381), )] (%8453:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=381)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8455:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=381)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=381), outputs_0:QuantSpec(Raw(type: Float16), uuid=383), )] (%8455:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=381)]) -> (%8456:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=383)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=383), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384), )] (%8456:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=383)]) -> (%8457:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384), )] (%8457:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)]) -> (%8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377), outputs_0:QuantSpec(Raw(type: Float16), uuid=385), )] (%8451:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377)]) -> (%8459:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=385)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=385), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386), )] (%8459:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=385)]) -> (%8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), )] (%8033:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)]) -> (%8461:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), )] (%8034:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)]) -> (%8462:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), )] (%8461:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) -> (%8463:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), )] (%8462:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%8464:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387), )] (%8454:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379)], %8463:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) -> (%8465:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387), inputs_1:QuantSpec(Raw(type: Float32), uuid=388), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387), )] (%8465:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387)], %8466:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=388), constant:[0.088388346]]) -> (%8467:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389), )] (%8467:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387)]) -> (%8468:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389), inputs_1:QuantSpec(Raw(type: Int16), uuid=390), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389), )] (%8468:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389)], %8469:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=390), constant:[-20]]) -> (%8470:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=391), outputs_0:QuantSpec(Raw(type: UInt8), uuid=392), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8471:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=391), constant:[-0.1796875]]) -> (%8472:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=392)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=392), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389), )] (%8472:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=392)], %8467:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387)], %8470:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389)]) -> (%8473:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393), )] (%8473:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389)]) -> (%8474:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394), )] (%8474:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393)], %8464:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%8475:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394), )] (%8475:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) -> (%8476:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394), )] (%8476:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) -> (%8476:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=395))] (%8476:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) -> (%8477:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)]) + cf.ReturnOp (%8477:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)]) -> () } } graph.SubGraphOp @model.layers.9.mlp [using_qnn:true, symbol:model.layers.9.mlp] { - (%784:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)]) -> (%789:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=404)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=399), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=398))] (%784:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)]) -> (%785:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=399)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=399), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=400), )] (%785:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=399)]) -> (%786:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=400)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=402), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=401))] (%784:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)]) -> (%787:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=402)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=400), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=402), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=400), )] (%786:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=400)], %787:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=402)]) -> (%788:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=400)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=400), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=404), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=403))] (%788:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=400)]) -> (%789:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=404)]) - cf.ReturnOp (%789:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=404)]) -> () + (%8479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397)]) -> (%8484:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=400), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=399))] (%8479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397)]) -> (%8480:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=400)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=400), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401), )] (%8480:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=400)]) -> (%8481:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=403), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=402))] (%8479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397)]) -> (%8482:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=403)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=403), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401), )] (%8481:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401)], %8482:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=403)]) -> (%8483:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=404))] (%8483:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401)]) -> (%8484:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)]) + cf.ReturnOp (%8484:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)]) -> () } } graph.SubGraphOp @model.layers.10 [using_qnn:true, symbol:model.layers.10] { - (%790:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=404)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %340:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %341:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%831:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=438)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=417)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=419)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=404), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405), )] (%790:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=404)]) -> (%791:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)]) - graph.CallGraphOp @model.layers.10.self_attn (%791:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %340:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %341:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%823:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=417)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=419)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=404), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429), )] (%823:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429)], %790:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=404)]) -> (%824:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430), )] (%824:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429)]) -> (%825:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)]) - graph.CallGraphOp @model.layers.10.mlp (%825:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)]) -> (%830:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=438)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=438), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=438), )] (%830:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=438)], %824:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429)]) -> (%831:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=438)]) - cf.ReturnOp (%831:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=438)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=417)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=419)]) -> () + (%8485:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8035:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8036:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%8526:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=407))] (%8485:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)]) -> (%8486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406)]) + graph.CallGraphOp @model.layers.10.self_attn (%8486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8035:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8036:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%8518:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430), )] (%8518:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)], %8485:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)]) -> (%8519:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=432))] (%8519:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)]) -> (%8520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431)]) + graph.CallGraphOp @model.layers.10.mlp (%8520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431)]) -> (%8525:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), )] (%8525:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)], %8519:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)]) -> (%8526:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) + cf.ReturnOp (%8526:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)]) -> () } } graph.SubGraphOp @model.layers.10.self_attn [using_qnn:true, symbol:model.layers.10.self_attn] { - (%791:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %340:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %341:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%823:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=417)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=419)]) { - linalg.CPU.LinearOp (%791:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)]) -> (%792:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=411)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=408), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=407))] (%791:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)]) -> (%793:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=408)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=410), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=409))] (%791:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)]) -> (%794:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=410)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=411), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=411), )] (%792:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=411)]) -> (%792:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=411)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=411), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=411), )] (%792:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=411)]) -> (%795:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=411)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=408), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=408), )] (%793:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=408)]) -> (%793:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=408)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=408), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=408), )] (%793:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=408)]) -> (%796:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=408)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=410), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=410), )] (%794:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=410)]) -> (%794:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=410)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=410), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=410), )] (%794:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=410)]) -> (%797:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=410)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=411), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=412), )] (%795:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=411)]) -> (%798:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=412)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=408), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414), )] (%796:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=408)]) -> (%799:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=412), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=412), )] (%798:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=412)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%800:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=412)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414), )] (%799:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%801:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414), outputs_0:QuantSpec(Raw(type: Float16), uuid=416), )] (%801:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414)]) -> (%802:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=416)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=416), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=417), )] (%802:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=416)]) -> (%803:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=417)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=417), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=417), )] (%803:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=417)]) -> (%804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=417)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=410), outputs_0:QuantSpec(Raw(type: Float16), uuid=418), )] (%797:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=410)]) -> (%805:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=418)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=418), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=419), )] (%805:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=418)]) -> (%806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=419)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=417), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), )] (%340:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=417)]) -> (%807:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=419), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), )] (%341:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=419)]) -> (%808:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), )] (%807:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) -> (%809:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), )] (%808:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%810:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=412), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=420), )] (%800:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=412)], %809:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) -> (%811:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=420)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=420), inputs_1:QuantSpec(Raw(type: Float32), uuid=421), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=420), )] (%811:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=420)], %812:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=421), constant:[0.088388346]]) -> (%813:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=420)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=420), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=422), )] (%813:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=420)]) -> (%814:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=422)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=422), inputs_1:QuantSpec(Raw(type: Int16), uuid=423), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=422), )] (%814:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=422)], %815:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=423), constant:[-20]]) -> (%816:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=422)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=424), outputs_0:QuantSpec(Raw(type: UInt8), uuid=425), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %817:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=424), constant:[-0.93359375]]) -> (%818:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=425)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=425), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=420), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=422), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=422), )] (%818:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=425)], %813:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=420)], %816:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=422)]) -> (%819:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=422)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=422), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=426), )] (%819:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=422)]) -> (%820:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=426)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=426), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427), )] (%820:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=426)], %810:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%821:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427), )] (%821:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427)]) -> (%822:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427), )] (%822:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427)]) -> (%822:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=428))] (%822:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427)]) -> (%823:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429)]) - cf.ReturnOp (%823:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=417)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=419)]) -> () + (%8486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8035:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8036:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%8518:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)]) { + linalg.CPU.LinearOp (%8486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406)]) -> (%8487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=412)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=408))] (%8486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406)]) -> (%8488:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=410))] (%8486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406)]) -> (%8489:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=412), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=412), )] (%8487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=412)]) -> (%8487:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=412)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=412), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=412), )] (%8487:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=412)]) -> (%8490:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=412)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409), )] (%8488:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409)]) -> (%8488:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409), )] (%8488:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409)]) -> (%8491:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411), )] (%8489:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411)]) -> (%8489:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411), )] (%8489:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411)]) -> (%8492:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=412), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=414))] (%8490:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=412)]) -> (%8493:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=415), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=416))] (%8491:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409)]) -> (%8494:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=415)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413), )] (%8493:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8495:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=415), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=415), )] (%8494:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=415)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8496:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=415)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=415), outputs_0:QuantSpec(Raw(type: Float16), uuid=417), )] (%8496:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=415)]) -> (%8497:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=417)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=417), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418), )] (%8497:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=417)]) -> (%8498:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418), )] (%8498:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)]) -> (%8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411), outputs_0:QuantSpec(Raw(type: Float16), uuid=419), )] (%8492:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411)]) -> (%8500:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=419)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=419), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420), )] (%8500:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=419)]) -> (%8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), )] (%8035:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)]) -> (%8502:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), )] (%8036:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)]) -> (%8503:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), )] (%8502:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) -> (%8504:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), )] (%8503:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%8505:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421), )] (%8495:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413)], %8504:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) -> (%8506:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421), inputs_1:QuantSpec(Raw(type: Float32), uuid=422), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421), )] (%8506:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421)], %8507:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=422), constant:[0.088388346]]) -> (%8508:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423), )] (%8508:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421)]) -> (%8509:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423), inputs_1:QuantSpec(Raw(type: Int16), uuid=424), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423), )] (%8509:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423)], %8510:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=424), constant:[-20]]) -> (%8511:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=425), outputs_0:QuantSpec(Raw(type: UInt8), uuid=426), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8512:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=425), constant:[-0.93359375]]) -> (%8513:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=426)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=426), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423), )] (%8513:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=426)], %8508:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421)], %8511:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423)]) -> (%8514:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427), )] (%8514:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423)]) -> (%8515:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428), )] (%8515:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427)], %8505:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%8516:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428), )] (%8516:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428)]) -> (%8517:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428), )] (%8517:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428)]) -> (%8517:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=429))] (%8517:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428)]) -> (%8518:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)]) + cf.ReturnOp (%8518:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)]) -> () } } graph.SubGraphOp @model.layers.10.mlp [using_qnn:true, symbol:model.layers.10.mlp] { - (%825:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)]) -> (%830:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=438)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=433), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=432))] (%825:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)]) -> (%826:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=433)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=433), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=434), )] (%826:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=433)]) -> (%827:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=434)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=436), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=435))] (%825:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)]) -> (%828:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=436)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=434), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=436), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=434), )] (%827:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=434)], %828:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=436)]) -> (%829:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=434)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=434), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=438), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=437))] (%829:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=434)]) -> (%830:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=438)]) - cf.ReturnOp (%830:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=438)]) -> () + (%8520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431)]) -> (%8525:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=434), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=433))] (%8520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431)]) -> (%8521:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=434)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=434), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=435), )] (%8521:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=434)]) -> (%8522:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=435)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=437), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=436))] (%8520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431)]) -> (%8523:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=437)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=435), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=437), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=435), )] (%8522:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=435)], %8523:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=437)]) -> (%8524:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=435)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=435), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=438))] (%8524:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=435)]) -> (%8525:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) + cf.ReturnOp (%8525:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) -> () } } graph.SubGraphOp @model.layers.11 [using_qnn:true, symbol:model.layers.11] { - (%831:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=438)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %342:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %343:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%872:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=472)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=451)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=453)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=438), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), )] (%831:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=438)]) -> (%832:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) - graph.CallGraphOp @model.layers.11.self_attn (%832:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %342:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %343:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%864:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=463)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=451)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=453)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=463), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=438), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=463), )] (%864:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=463)], %831:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=438)]) -> (%865:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=463)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=463), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464), )] (%865:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=463)]) -> (%866:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)]) - graph.CallGraphOp @model.layers.11.mlp (%866:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)]) -> (%871:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=472)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=472), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=463), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=472), )] (%871:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=472)], %865:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=463)]) -> (%872:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=472)]) - cf.ReturnOp (%872:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=472)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=451)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=453)]) -> () + (%8526:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8037:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8038:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%8567:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=441))] (%8526:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) -> (%8527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440)]) + graph.CallGraphOp @model.layers.11.self_attn (%8527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8037:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8038:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%8559:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464), )] (%8559:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)], %8526:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) -> (%8560:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=465), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=466))] (%8560:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)]) -> (%8561:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=465)]) + graph.CallGraphOp @model.layers.11.mlp (%8561:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=465)]) -> (%8566:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473), )] (%8566:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)], %8560:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)]) -> (%8567:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) + cf.ReturnOp (%8567:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)]) -> () } } graph.SubGraphOp @model.layers.11.self_attn [using_qnn:true, symbol:model.layers.11.self_attn] { - (%832:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %342:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %343:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%864:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=463)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=451)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=453)]) { - linalg.CPU.LinearOp (%832:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) -> (%833:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=445)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=442), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=441))] (%832:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) -> (%834:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=442)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=443))] (%832:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) -> (%835:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=445), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=445), )] (%833:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=445)]) -> (%833:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=445)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=445), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=445), )] (%833:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=445)]) -> (%836:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=445)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=442), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=442), )] (%834:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=442)]) -> (%834:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=442)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=442), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=442), )] (%834:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=442)]) -> (%837:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=442)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444), )] (%835:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444)]) -> (%835:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444), )] (%835:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444)]) -> (%838:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=445), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446), )] (%836:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=445)]) -> (%839:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=442), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=448), )] (%837:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=442)]) -> (%840:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=448)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446), )] (%839:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%841:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=448), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=448), )] (%840:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=448)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%842:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=448)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=448), outputs_0:QuantSpec(Raw(type: Float16), uuid=450), )] (%842:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=448)]) -> (%843:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=450)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=450), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=451), )] (%843:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=450)]) -> (%844:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=451)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=451), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=451), )] (%844:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=451)]) -> (%845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=451)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444), outputs_0:QuantSpec(Raw(type: Float16), uuid=452), )] (%838:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444)]) -> (%846:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=452)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=452), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=453), )] (%846:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=452)]) -> (%847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=453)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=451), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), )] (%342:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=451)]) -> (%848:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=453), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), )] (%343:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=453)]) -> (%849:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), )] (%848:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) -> (%850:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), )] (%849:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%851:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454), )] (%841:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446)], %850:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) -> (%852:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454), inputs_1:QuantSpec(Raw(type: Float32), uuid=455), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454), )] (%852:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454)], %853:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=455), constant:[0.088388346]]) -> (%854:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=456), )] (%854:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454)]) -> (%855:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=456)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=456), inputs_1:QuantSpec(Raw(type: Int16), uuid=457), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=456), )] (%855:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=456)], %856:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=457), constant:[-20]]) -> (%857:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=456)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=458), outputs_0:QuantSpec(Raw(type: UInt8), uuid=459), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %858:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=458), constant:[0.515625]]) -> (%859:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=459)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=459), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=456), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=456), )] (%859:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=459)], %854:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454)], %857:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=456)]) -> (%860:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=456)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=456), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=460), )] (%860:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=456)]) -> (%861:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=460)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=460), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461), )] (%861:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=460)], %851:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%862:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461), )] (%862:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461)]) -> (%863:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461), )] (%863:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461)]) -> (%863:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=463), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=462))] (%863:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461)]) -> (%864:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=463)]) - cf.ReturnOp (%864:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=463)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=451)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=453)]) -> () + (%8527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8037:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8038:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%8559:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)]) { + linalg.CPU.LinearOp (%8527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440)]) -> (%8528:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=446)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=442))] (%8527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440)]) -> (%8529:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=444))] (%8527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440)]) -> (%8530:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=446), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=446), )] (%8528:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=446)]) -> (%8528:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=446)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=446), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=446), )] (%8528:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=446)]) -> (%8531:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=446)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443), )] (%8529:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443)]) -> (%8529:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443), )] (%8529:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443)]) -> (%8532:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445), )] (%8530:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445)]) -> (%8530:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445), )] (%8530:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445)]) -> (%8533:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=446), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=448))] (%8531:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=446)]) -> (%8534:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=450))] (%8532:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443)]) -> (%8535:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447), )] (%8534:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8536:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449), )] (%8535:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8537:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449), outputs_0:QuantSpec(Raw(type: Float16), uuid=451), )] (%8537:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449)]) -> (%8538:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=451)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=451), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452), )] (%8538:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=451)]) -> (%8539:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452), )] (%8539:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)]) -> (%8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445), outputs_0:QuantSpec(Raw(type: Float16), uuid=453), )] (%8533:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445)]) -> (%8541:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=453)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=453), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454), )] (%8541:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=453)]) -> (%8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), )] (%8037:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)]) -> (%8543:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), )] (%8038:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)]) -> (%8544:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), )] (%8543:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) -> (%8545:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), )] (%8544:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%8546:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455), )] (%8536:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447)], %8545:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) -> (%8547:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455), inputs_1:QuantSpec(Raw(type: Float32), uuid=456), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455), )] (%8547:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455)], %8548:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=456), constant:[0.088388346]]) -> (%8549:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457), )] (%8549:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455)]) -> (%8550:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457), inputs_1:QuantSpec(Raw(type: Int16), uuid=458), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457), )] (%8550:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457)], %8551:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=458), constant:[-20]]) -> (%8552:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=459), outputs_0:QuantSpec(Raw(type: UInt8), uuid=460), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8553:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=459), constant:[0.515625]]) -> (%8554:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=460)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=460), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457), )] (%8554:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=460)], %8549:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455)], %8552:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457)]) -> (%8555:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461), )] (%8555:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457)]) -> (%8556:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462), )] (%8556:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461)], %8546:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%8557:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462), )] (%8557:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462)]) -> (%8558:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462), )] (%8558:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462)]) -> (%8558:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=463))] (%8558:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462)]) -> (%8559:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)]) + cf.ReturnOp (%8559:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)]) -> () } } graph.SubGraphOp @model.layers.11.mlp [using_qnn:true, symbol:model.layers.11.mlp] { - (%866:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)]) -> (%871:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=472)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=467), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=466))] (%866:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)]) -> (%867:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=467)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=467), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=468), )] (%867:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=467)]) -> (%868:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=468)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=470), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=469))] (%866:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)]) -> (%869:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=470)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=468), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=470), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=468), )] (%868:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=468)], %869:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=470)]) -> (%870:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=468)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=468), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=472), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=471))] (%870:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=468)]) -> (%871:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=472)]) - cf.ReturnOp (%871:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=472)]) -> () + (%8561:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=465)]) -> (%8566:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=465), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=468), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=467))] (%8561:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=465)]) -> (%8562:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=468)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=468), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469), )] (%8562:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=468)]) -> (%8563:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=465), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=471), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=470))] (%8561:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=465)]) -> (%8564:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=471)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=471), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469), )] (%8563:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469)], %8564:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=471)]) -> (%8565:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=472))] (%8565:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469)]) -> (%8566:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) + cf.ReturnOp (%8566:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) -> () } } graph.SubGraphOp @model.layers.12 [using_qnn:true, symbol:model.layers.12] { - (%872:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=472)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %344:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %345:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%913:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=485)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=487)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=472), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473), )] (%872:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=472)]) -> (%873:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) - graph.CallGraphOp @model.layers.12.self_attn (%873:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %344:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %345:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%905:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=485)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=487)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=472), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497), )] (%905:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497)], %872:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=472)]) -> (%906:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498), )] (%906:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497)]) -> (%907:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)]) - graph.CallGraphOp @model.layers.12.mlp (%907:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)]) -> (%912:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506), )] (%912:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)], %906:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497)]) -> (%913:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)]) - cf.ReturnOp (%913:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=485)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=487)]) -> () + (%8567:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8039:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8040:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%8608:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=475))] (%8567:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) -> (%8568:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474)]) + graph.CallGraphOp @model.layers.12.self_attn (%8568:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8039:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8040:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%8600:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498), )] (%8600:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)], %8567:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) -> (%8601:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=500))] (%8601:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)]) -> (%8602:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499)]) + graph.CallGraphOp @model.layers.12.mlp (%8602:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499)]) -> (%8607:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507), )] (%8607:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)], %8601:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)]) -> (%8608:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) + cf.ReturnOp (%8608:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)]) -> () } } graph.SubGraphOp @model.layers.12.self_attn [using_qnn:true, symbol:model.layers.12.self_attn] { - (%873:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %344:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %345:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%905:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=485)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=487)]) { - linalg.CPU.LinearOp (%873:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) -> (%874:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=479)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=475))] (%873:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) -> (%875:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=478), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=477))] (%873:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) -> (%876:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=478)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=479), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=479), )] (%874:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=479)]) -> (%874:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=479)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=479), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=479), )] (%874:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=479)]) -> (%877:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=479)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476), )] (%875:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476)]) -> (%875:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476), )] (%875:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476)]) -> (%878:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=478), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=478), )] (%876:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=478)]) -> (%876:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=478)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=478), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=478), )] (%876:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=478)]) -> (%879:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=478)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=479), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=480), )] (%877:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=479)]) -> (%880:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=480)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=482), )] (%878:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476)]) -> (%881:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=482)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=480), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=480), )] (%880:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=480)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%882:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=480)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=482), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=482), )] (%881:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=482)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%883:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=482)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=482), outputs_0:QuantSpec(Raw(type: Float16), uuid=484), )] (%883:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=482)]) -> (%884:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=484)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=484), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=485), )] (%884:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=484)]) -> (%885:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=485)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=485), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=485), )] (%885:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=485)]) -> (%886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=485)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=478), outputs_0:QuantSpec(Raw(type: Float16), uuid=486), )] (%879:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=478)]) -> (%887:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=486)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=486), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=487), )] (%887:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=486)]) -> (%888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=487)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=485), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), )] (%344:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=485)]) -> (%889:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=487), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), )] (%345:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=487)]) -> (%890:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), )] (%889:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) -> (%891:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), )] (%890:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%892:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=480), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=488), )] (%882:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=480)], %891:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) -> (%893:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=488)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=488), inputs_1:QuantSpec(Raw(type: Float32), uuid=489), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=488), )] (%893:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=488)], %894:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=489), constant:[0.088388346]]) -> (%895:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=488)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=488), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=490), )] (%895:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=488)]) -> (%896:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=490)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=490), inputs_1:QuantSpec(Raw(type: Int16), uuid=491), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=490), )] (%896:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=490)], %897:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=491), constant:[-20]]) -> (%898:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=490)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=492), outputs_0:QuantSpec(Raw(type: UInt8), uuid=493), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %899:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=492), constant:[0.74609375]]) -> (%900:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=493)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=493), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=488), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=490), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=490), )] (%900:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=493)], %895:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=488)], %898:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=490)]) -> (%901:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=490)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=490), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=494), )] (%901:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=490)]) -> (%902:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=494)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=494), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495), )] (%902:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=494)], %892:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%903:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495), )] (%903:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495)]) -> (%904:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495), )] (%904:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495)]) -> (%904:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=496))] (%904:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495)]) -> (%905:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497)]) - cf.ReturnOp (%905:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=485)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=487)]) -> () + (%8568:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8039:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8040:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%8600:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)]) { + linalg.CPU.LinearOp (%8568:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474)]) -> (%8569:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=480)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=476))] (%8568:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474)]) -> (%8570:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=478))] (%8568:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474)]) -> (%8571:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=480), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=480), )] (%8569:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=480)]) -> (%8569:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=480)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=480), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=480), )] (%8569:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=480)]) -> (%8572:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=480)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477), )] (%8570:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477)]) -> (%8570:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477), )] (%8570:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477)]) -> (%8573:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479), )] (%8571:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479)]) -> (%8571:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479), )] (%8571:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479)]) -> (%8574:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=480), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=481), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=482))] (%8572:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=480)]) -> (%8575:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=481)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=483), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484))] (%8573:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477)]) -> (%8576:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=483)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=481), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=481), )] (%8575:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=481)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8577:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=481)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=483), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=483), )] (%8576:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=483)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8578:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=483)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=483), outputs_0:QuantSpec(Raw(type: Float16), uuid=485), )] (%8578:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=483)]) -> (%8579:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=485)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=485), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486), )] (%8579:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=485)]) -> (%8580:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486), )] (%8580:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)]) -> (%8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479), outputs_0:QuantSpec(Raw(type: Float16), uuid=487), )] (%8574:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479)]) -> (%8582:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=487)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=487), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488), )] (%8582:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=487)]) -> (%8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), )] (%8039:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)]) -> (%8584:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), )] (%8040:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)]) -> (%8585:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), )] (%8584:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) -> (%8586:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), )] (%8585:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%8587:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=481), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489), )] (%8577:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=481)], %8586:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) -> (%8588:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489), inputs_1:QuantSpec(Raw(type: Float32), uuid=490), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489), )] (%8588:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489)], %8589:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=490), constant:[0.088388346]]) -> (%8590:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491), )] (%8590:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489)]) -> (%8591:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491), inputs_1:QuantSpec(Raw(type: Int16), uuid=492), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491), )] (%8591:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491)], %8592:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=492), constant:[-20]]) -> (%8593:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=493), outputs_0:QuantSpec(Raw(type: UInt8), uuid=494), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8594:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=493), constant:[0.74609375]]) -> (%8595:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=494)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=494), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491), )] (%8595:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=494)], %8590:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489)], %8593:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491)]) -> (%8596:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495), )] (%8596:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491)]) -> (%8597:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496), )] (%8597:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495)], %8587:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%8598:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496), )] (%8598:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496)]) -> (%8599:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496), )] (%8599:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496)]) -> (%8599:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=497))] (%8599:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496)]) -> (%8600:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)]) + cf.ReturnOp (%8600:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)]) -> () } } graph.SubGraphOp @model.layers.12.mlp [using_qnn:true, symbol:model.layers.12.mlp] { - (%907:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)]) -> (%912:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=501), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=500))] (%907:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)]) -> (%908:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=501)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=501), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=502), )] (%908:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=501)]) -> (%909:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=502)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=504), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=503))] (%907:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)]) -> (%910:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=504)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=502), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=504), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=502), )] (%909:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=502)], %910:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=504)]) -> (%911:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=502)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=502), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=505))] (%911:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=502)]) -> (%912:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)]) - cf.ReturnOp (%912:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)]) -> () + (%8602:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499)]) -> (%8607:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=502), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=501))] (%8602:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499)]) -> (%8603:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=502)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=502), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503), )] (%8603:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=502)]) -> (%8604:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=505), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=504))] (%8602:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499)]) -> (%8605:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=505)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=505), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503), )] (%8604:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503)], %8605:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=505)]) -> (%8606:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=506))] (%8606:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503)]) -> (%8607:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) + cf.ReturnOp (%8607:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) -> () } } graph.SubGraphOp @model.layers.13 [using_qnn:true, symbol:model.layers.13] { - (%913:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %346:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %347:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%954:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=519)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=521)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507), )] (%913:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)]) -> (%914:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) - graph.CallGraphOp @model.layers.13.self_attn (%914:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %346:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %347:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%946:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=531)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=519)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=521)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=531), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=531), )] (%946:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=531)], %913:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)]) -> (%947:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=531)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=531), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532), )] (%947:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=531)]) -> (%948:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)]) - graph.CallGraphOp @model.layers.13.mlp (%948:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)]) -> (%953:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=531), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540), )] (%953:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540)], %947:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=531)]) -> (%954:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540)]) - cf.ReturnOp (%954:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=519)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=521)]) -> () + (%8608:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8041:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8042:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%8649:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=509))] (%8608:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) -> (%8609:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508)]) + graph.CallGraphOp @model.layers.13.self_attn (%8609:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8041:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8042:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%8641:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532), )] (%8641:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)], %8608:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) -> (%8642:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=534))] (%8642:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)]) -> (%8643:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533)]) + graph.CallGraphOp @model.layers.13.mlp (%8643:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533)]) -> (%8648:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541), )] (%8648:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)], %8642:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)]) -> (%8649:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)]) + cf.ReturnOp (%8649:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)]) -> () } } graph.SubGraphOp @model.layers.13.self_attn [using_qnn:true, symbol:model.layers.13.self_attn] { - (%914:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %346:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %347:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%946:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=531)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=519)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=521)]) { - linalg.CPU.LinearOp (%914:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) -> (%915:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=513)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=509))] (%914:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) -> (%916:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=512), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=511))] (%914:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) -> (%917:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=512)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=513), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=513), )] (%915:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=513)]) -> (%915:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=513)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=513), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=513), )] (%915:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=513)]) -> (%918:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=513)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510), )] (%916:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510)]) -> (%916:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510), )] (%916:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510)]) -> (%919:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=512), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=512), )] (%917:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=512)]) -> (%917:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=512)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=512), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=512), )] (%917:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=512)]) -> (%920:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=512)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=513), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514), )] (%918:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=513)]) -> (%921:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=516), )] (%919:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510)]) -> (%922:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=516)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514), )] (%921:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%923:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=516), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=516), )] (%922:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=516)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%924:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=516)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=516), outputs_0:QuantSpec(Raw(type: Float16), uuid=518), )] (%924:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=516)]) -> (%925:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=518)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=518), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=519), )] (%925:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=518)]) -> (%926:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=519)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=519), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=519), )] (%926:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=519)]) -> (%927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=519)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=512), outputs_0:QuantSpec(Raw(type: Float16), uuid=520), )] (%920:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=512)]) -> (%928:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=520)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=520), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=521), )] (%928:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=520)]) -> (%929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=521)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=519), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), )] (%346:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=519)]) -> (%930:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=521), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), )] (%347:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=521)]) -> (%931:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), )] (%930:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) -> (%932:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), )] (%931:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%933:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=522), )] (%923:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514)], %932:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) -> (%934:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=522)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=522), inputs_1:QuantSpec(Raw(type: Float32), uuid=523), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=522), )] (%934:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=522)], %935:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=523), constant:[0.088388346]]) -> (%936:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=522)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=522), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=524), )] (%936:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=522)]) -> (%937:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=524)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=524), inputs_1:QuantSpec(Raw(type: Int16), uuid=525), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=524), )] (%937:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=524)], %938:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=525), constant:[-20]]) -> (%939:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=524)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=526), outputs_0:QuantSpec(Raw(type: UInt8), uuid=527), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %940:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=526), constant:[-0.78515625]]) -> (%941:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=527)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=527), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=522), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=524), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=524), )] (%941:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=527)], %936:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=522)], %939:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=524)]) -> (%942:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=524)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=524), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=528), )] (%942:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=524)]) -> (%943:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=528)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=528), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529), )] (%943:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=528)], %933:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%944:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529), )] (%944:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529)]) -> (%945:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529), )] (%945:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529)]) -> (%945:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=531), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=530))] (%945:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529)]) -> (%946:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=531)]) - cf.ReturnOp (%946:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=531)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=519)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=521)]) -> () + (%8609:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8041:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8042:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%8641:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)]) { + linalg.CPU.LinearOp (%8609:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508)]) -> (%8610:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=514)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=510))] (%8609:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508)]) -> (%8611:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=512))] (%8609:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508)]) -> (%8612:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=514), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=514), )] (%8610:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=514)]) -> (%8610:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=514)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=514), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=514), )] (%8610:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=514)]) -> (%8613:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=514)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511), )] (%8611:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511)]) -> (%8611:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511), )] (%8611:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511)]) -> (%8614:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513), )] (%8612:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513)]) -> (%8612:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513), )] (%8612:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513)]) -> (%8615:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=514), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=516))] (%8613:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=514)]) -> (%8616:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=518))] (%8614:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511)]) -> (%8617:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515), )] (%8616:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8618:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517), )] (%8617:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8619:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517), outputs_0:QuantSpec(Raw(type: Float16), uuid=519), )] (%8619:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517)]) -> (%8620:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=519)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=519), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520), )] (%8620:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=519)]) -> (%8621:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520), )] (%8621:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)]) -> (%8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513), outputs_0:QuantSpec(Raw(type: Float16), uuid=521), )] (%8615:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513)]) -> (%8623:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=521)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=521), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522), )] (%8623:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=521)]) -> (%8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), )] (%8041:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)]) -> (%8625:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), )] (%8042:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)]) -> (%8626:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), )] (%8625:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) -> (%8627:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), )] (%8626:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%8628:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523), )] (%8618:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515)], %8627:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) -> (%8629:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523), inputs_1:QuantSpec(Raw(type: Float32), uuid=524), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523), )] (%8629:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523)], %8630:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=524), constant:[0.088388346]]) -> (%8631:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525), )] (%8631:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523)]) -> (%8632:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525), inputs_1:QuantSpec(Raw(type: Int16), uuid=526), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525), )] (%8632:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525)], %8633:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=526), constant:[-20]]) -> (%8634:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=527), outputs_0:QuantSpec(Raw(type: UInt8), uuid=528), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8635:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=527), constant:[-0.78515625]]) -> (%8636:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=528)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=528), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525), )] (%8636:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=528)], %8631:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523)], %8634:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525)]) -> (%8637:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529), )] (%8637:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525)]) -> (%8638:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530), )] (%8638:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529)], %8628:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%8639:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530), )] (%8639:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530)]) -> (%8640:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530), )] (%8640:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530)]) -> (%8640:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=531))] (%8640:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530)]) -> (%8641:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)]) + cf.ReturnOp (%8641:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)]) -> () } } graph.SubGraphOp @model.layers.13.mlp [using_qnn:true, symbol:model.layers.13.mlp] { - (%948:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)]) -> (%953:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=535), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=534))] (%948:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)]) -> (%949:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=535)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=535), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536), )] (%949:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=535)]) -> (%950:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=538), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=537))] (%948:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)]) -> (%951:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=538)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=538), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536), )] (%950:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536)], %951:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=538)]) -> (%952:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=539))] (%952:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536)]) -> (%953:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540)]) - cf.ReturnOp (%953:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540)]) -> () + (%8643:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533)]) -> (%8648:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=535))] (%8643:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533)]) -> (%8644:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537), )] (%8644:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536)]) -> (%8645:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=539), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=538))] (%8643:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533)]) -> (%8646:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=539)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=539), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537), )] (%8645:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537)], %8646:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=539)]) -> (%8647:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=540))] (%8647:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537)]) -> (%8648:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)]) + cf.ReturnOp (%8648:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)]) -> () } } graph.SubGraphOp @model.layers.14 [using_qnn:true, symbol:model.layers.14] { - (%954:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %348:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %349:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%995:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=553)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=555)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541), )] (%954:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540)]) -> (%955:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)]) - graph.CallGraphOp @model.layers.14.self_attn (%955:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %348:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %349:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%987:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=565)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=553)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=555)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=565), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=565), )] (%987:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=565)], %954:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540)]) -> (%988:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=565)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=565), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566), )] (%988:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=565)]) -> (%989:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)]) - graph.CallGraphOp @model.layers.14.mlp (%989:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)]) -> (%994:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=565), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574), )] (%994:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)], %988:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=565)]) -> (%995:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)]) - cf.ReturnOp (%995:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=553)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=555)]) -> () + (%8649:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8043:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8044:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%8690:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=543))] (%8649:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)]) -> (%8650:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542)]) + graph.CallGraphOp @model.layers.14.self_attn (%8650:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8043:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8044:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%8682:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566), )] (%8682:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)], %8649:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)]) -> (%8683:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=568))] (%8683:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)]) -> (%8684:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567)]) + graph.CallGraphOp @model.layers.14.mlp (%8684:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567)]) -> (%8689:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575), )] (%8689:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)], %8683:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)]) -> (%8690:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) + cf.ReturnOp (%8690:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) -> () } } graph.SubGraphOp @model.layers.14.self_attn [using_qnn:true, symbol:model.layers.14.self_attn] { - (%955:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %348:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %349:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%987:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=565)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=553)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=555)]) { - linalg.CPU.LinearOp (%955:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)]) -> (%956:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=547)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=543))] (%955:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)]) -> (%957:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=546), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=545))] (%955:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)]) -> (%958:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=546)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=547), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=547), )] (%956:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=547)]) -> (%956:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=547)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=547), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=547), )] (%956:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=547)]) -> (%959:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=547)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544), )] (%957:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)]) -> (%957:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544), )] (%957:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)]) -> (%960:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=546), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=546), )] (%958:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=546)]) -> (%958:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=546)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=546), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=546), )] (%958:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=546)]) -> (%961:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=546)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=547), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=548), )] (%959:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=547)]) -> (%962:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=548)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=550), )] (%960:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)]) -> (%963:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=550)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=548), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=548), )] (%962:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=548)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%964:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=548)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=550), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=550), )] (%963:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=550)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%965:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=550)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=550), outputs_0:QuantSpec(Raw(type: Float16), uuid=552), )] (%965:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=550)]) -> (%966:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=552)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=552), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=553), )] (%966:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=552)]) -> (%967:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=553)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=553), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=553), )] (%967:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=553)]) -> (%968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=553)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=546), outputs_0:QuantSpec(Raw(type: Float16), uuid=554), )] (%961:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=546)]) -> (%969:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=554)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=554), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=555), )] (%969:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=554)]) -> (%970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=555)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=553), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), )] (%348:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=553)]) -> (%971:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=555), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), )] (%349:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=555)]) -> (%972:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), )] (%971:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) -> (%973:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), )] (%972:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%974:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=548), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=556), )] (%964:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=548)], %973:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) -> (%975:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=556)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=556), inputs_1:QuantSpec(Raw(type: Float32), uuid=557), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=556), )] (%975:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=556)], %976:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=557), constant:[0.088388346]]) -> (%977:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=556)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=556), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=558), )] (%977:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=556)]) -> (%978:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=558)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=558), inputs_1:QuantSpec(Raw(type: Int16), uuid=559), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=558), )] (%978:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=558)], %979:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=559), constant:[-20]]) -> (%980:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=558)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=560), outputs_0:QuantSpec(Raw(type: UInt8), uuid=561), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %981:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=560), constant:[-0.46289062]]) -> (%982:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=561)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=561), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=556), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=558), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=558), )] (%982:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=561)], %977:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=556)], %980:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=558)]) -> (%983:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=558)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=558), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=562), )] (%983:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=558)]) -> (%984:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=562)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=562), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563), )] (%984:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=562)], %974:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%985:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563), )] (%985:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563)]) -> (%986:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563), )] (%986:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563)]) -> (%986:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=565), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=564))] (%986:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563)]) -> (%987:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=565)]) - cf.ReturnOp (%987:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=565)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=553)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=555)]) -> () + (%8650:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8043:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8044:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%8682:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) { + linalg.CPU.LinearOp (%8650:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542)]) -> (%8651:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=548)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=544))] (%8650:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542)]) -> (%8652:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=546))] (%8650:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542)]) -> (%8653:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=548), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=548), )] (%8651:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=548)]) -> (%8651:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=548)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=548), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=548), )] (%8651:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=548)]) -> (%8654:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=548)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545), )] (%8652:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545)]) -> (%8652:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545), )] (%8652:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545)]) -> (%8655:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547), )] (%8653:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547)]) -> (%8653:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547), )] (%8653:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547)]) -> (%8656:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=548), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=550))] (%8654:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=548)]) -> (%8657:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=552))] (%8655:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545)]) -> (%8658:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549), )] (%8657:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8659:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551), )] (%8658:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8660:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551), outputs_0:QuantSpec(Raw(type: Float16), uuid=553), )] (%8660:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551)]) -> (%8661:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=553)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=553), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554), )] (%8661:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=553)]) -> (%8662:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554), )] (%8662:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)]) -> (%8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547), outputs_0:QuantSpec(Raw(type: Float16), uuid=555), )] (%8656:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547)]) -> (%8664:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=555)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=555), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556), )] (%8664:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=555)]) -> (%8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), )] (%8043:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)]) -> (%8666:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), )] (%8044:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) -> (%8667:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), )] (%8666:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) -> (%8668:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), )] (%8667:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%8669:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557), )] (%8659:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549)], %8668:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) -> (%8670:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557), inputs_1:QuantSpec(Raw(type: Float32), uuid=558), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557), )] (%8670:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557)], %8671:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=558), constant:[0.088388346]]) -> (%8672:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559), )] (%8672:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557)]) -> (%8673:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559), inputs_1:QuantSpec(Raw(type: Int16), uuid=560), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559), )] (%8673:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559)], %8674:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=560), constant:[-20]]) -> (%8675:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=561), outputs_0:QuantSpec(Raw(type: UInt8), uuid=562), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8676:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=561), constant:[-0.46289062]]) -> (%8677:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=562)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=562), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559), )] (%8677:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=562)], %8672:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557)], %8675:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559)]) -> (%8678:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563), )] (%8678:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559)]) -> (%8679:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564), )] (%8679:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563)], %8669:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%8680:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564), )] (%8680:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564)]) -> (%8681:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564), )] (%8681:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564)]) -> (%8681:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=565))] (%8681:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564)]) -> (%8682:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)]) + cf.ReturnOp (%8682:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) -> () } } graph.SubGraphOp @model.layers.14.mlp [using_qnn:true, symbol:model.layers.14.mlp] { - (%989:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)]) -> (%994:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=569), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=568))] (%989:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)]) -> (%990:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=569)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=569), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570), )] (%990:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=569)]) -> (%991:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=572), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=571))] (%989:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)]) -> (%992:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=572)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=572), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570), )] (%991:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570)], %992:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=572)]) -> (%993:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=573))] (%993:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570)]) -> (%994:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)]) - cf.ReturnOp (%994:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)]) -> () + (%8684:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567)]) -> (%8689:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=569))] (%8684:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567)]) -> (%8685:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=571), )] (%8685:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570)]) -> (%8686:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=571)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=573), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=572))] (%8684:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567)]) -> (%8687:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=573)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=571), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=573), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=571), )] (%8686:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=571)], %8687:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=573)]) -> (%8688:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=571)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=571), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=574))] (%8688:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=571)]) -> (%8689:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) + cf.ReturnOp (%8689:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) -> () } } graph.SubGraphOp @model.layers.15 [using_qnn:true, symbol:model.layers.15] { - (%995:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %350:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %351:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%1036:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=608)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=587)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=589)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575), )] (%995:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)]) -> (%996:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) - graph.CallGraphOp @model.layers.15.self_attn (%996:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %350:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %351:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%1028:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=599)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=587)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=589)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=599), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=599), )] (%1028:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=599)], %995:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)]) -> (%1029:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=599)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=599), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600), )] (%1029:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=599)]) -> (%1030:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)]) - graph.CallGraphOp @model.layers.15.mlp (%1030:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)]) -> (%1035:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=608)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=608), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=599), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=608), )] (%1035:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=608)], %1029:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=599)]) -> (%1036:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=608)]) - cf.ReturnOp (%1036:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=608)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=587)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=589)]) -> () + (%8690:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8045:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8046:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%8731:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=577))] (%8690:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) -> (%8691:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576)]) + graph.CallGraphOp @model.layers.15.self_attn (%8691:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8045:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8046:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%8723:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600), )] (%8723:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)], %8690:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) -> (%8724:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=601), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=602))] (%8724:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)]) -> (%8725:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=601)]) + graph.CallGraphOp @model.layers.15.mlp (%8725:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=601)]) -> (%8730:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), )] (%8730:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)], %8724:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)]) -> (%8731:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) + cf.ReturnOp (%8731:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)]) -> () } } graph.SubGraphOp @model.layers.15.self_attn [using_qnn:true, symbol:model.layers.15.self_attn] { - (%996:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %350:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %351:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%1028:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=599)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=587)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=589)]) { - linalg.CPU.LinearOp (%996:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) -> (%997:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=581)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=578), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=577))] (%996:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) -> (%998:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=578)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=580), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=579))] (%996:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) -> (%999:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=580)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=581), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=581), )] (%997:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=581)]) -> (%997:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=581)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=581), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=581), )] (%997:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=581)]) -> (%1000:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=581)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=578), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=578), )] (%998:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=578)]) -> (%998:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=578)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=578), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=578), )] (%998:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=578)]) -> (%1001:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=578)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=580), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=580), )] (%999:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=580)]) -> (%999:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=580)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=580), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=580), )] (%999:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=580)]) -> (%1002:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=580)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=581), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=582), )] (%1000:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=581)]) -> (%1003:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=582)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=578), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=584), )] (%1001:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=578)]) -> (%1004:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=584)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=582), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=582), )] (%1003:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=582)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1005:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=582)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=584), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=584), )] (%1004:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=584)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1006:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=584)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=584), outputs_0:QuantSpec(Raw(type: Float16), uuid=586), )] (%1006:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=584)]) -> (%1007:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=586)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=586), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=587), )] (%1007:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=586)]) -> (%1008:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=587)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=587), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=587), )] (%1008:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=587)]) -> (%1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=587)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=580), outputs_0:QuantSpec(Raw(type: Float16), uuid=588), )] (%1002:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=580)]) -> (%1010:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=588)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=588), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=589), )] (%1010:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=588)]) -> (%1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=589)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=587), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), )] (%350:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=587)]) -> (%1012:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=589), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), )] (%351:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=589)]) -> (%1013:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), )] (%1012:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) -> (%1014:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), )] (%1013:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%1015:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=582), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=590), )] (%1005:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=582)], %1014:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) -> (%1016:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=590)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=590), inputs_1:QuantSpec(Raw(type: Float32), uuid=591), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=590), )] (%1016:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=590)], %1017:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=591), constant:[0.088388346]]) -> (%1018:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=590)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=590), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=592), )] (%1018:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=590)]) -> (%1019:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=592)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=592), inputs_1:QuantSpec(Raw(type: Int16), uuid=593), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=592), )] (%1019:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=592)], %1020:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=593), constant:[-20]]) -> (%1021:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=592)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=594), outputs_0:QuantSpec(Raw(type: UInt8), uuid=595), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1022:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=594), constant:[0.953125]]) -> (%1023:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=595)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=595), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=590), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=592), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=592), )] (%1023:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=595)], %1018:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=590)], %1021:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=592)]) -> (%1024:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=592)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=592), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=596), )] (%1024:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=592)]) -> (%1025:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=596)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=596), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597), )] (%1025:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=596)], %1015:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%1026:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597), )] (%1026:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597)]) -> (%1027:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597), )] (%1027:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597)]) -> (%1027:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=599), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=598))] (%1027:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597)]) -> (%1028:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=599)]) - cf.ReturnOp (%1028:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=599)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=587)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=589)]) -> () + (%8691:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8045:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8046:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%8723:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)]) { + linalg.CPU.LinearOp (%8691:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576)]) -> (%8692:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=582)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=578))] (%8691:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576)]) -> (%8693:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=580))] (%8691:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576)]) -> (%8694:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=582), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=582), )] (%8692:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=582)]) -> (%8692:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=582)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=582), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=582), )] (%8692:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=582)]) -> (%8695:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=582)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579), )] (%8693:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579)]) -> (%8693:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579), )] (%8693:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579)]) -> (%8696:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581), )] (%8694:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581)]) -> (%8694:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581), )] (%8694:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581)]) -> (%8697:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=582), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=583), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=584))] (%8695:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=582)]) -> (%8698:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=583)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=585), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=586))] (%8696:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579)]) -> (%8699:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=585)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=583), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=583), )] (%8698:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=583)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8700:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=583)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=585), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=585), )] (%8699:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=585)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8701:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=585)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=585), outputs_0:QuantSpec(Raw(type: Float16), uuid=587), )] (%8701:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=585)]) -> (%8702:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=587)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=587), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588), )] (%8702:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=587)]) -> (%8703:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588), )] (%8703:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)]) -> (%8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581), outputs_0:QuantSpec(Raw(type: Float16), uuid=589), )] (%8697:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581)]) -> (%8705:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=589)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=589), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590), )] (%8705:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=589)]) -> (%8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), )] (%8045:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)]) -> (%8707:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), )] (%8046:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)]) -> (%8708:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), )] (%8707:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) -> (%8709:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), )] (%8708:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%8710:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=583), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591), )] (%8700:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=583)], %8709:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) -> (%8711:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591), inputs_1:QuantSpec(Raw(type: Float32), uuid=592), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591), )] (%8711:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591)], %8712:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=592), constant:[0.088388346]]) -> (%8713:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593), )] (%8713:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591)]) -> (%8714:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593), inputs_1:QuantSpec(Raw(type: Int16), uuid=594), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593), )] (%8714:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593)], %8715:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=594), constant:[-20]]) -> (%8716:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=595), outputs_0:QuantSpec(Raw(type: UInt8), uuid=596), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8717:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=595), constant:[0.953125]]) -> (%8718:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=596)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=596), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593), )] (%8718:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=596)], %8713:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591)], %8716:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593)]) -> (%8719:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597), )] (%8719:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593)]) -> (%8720:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598), )] (%8720:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597)], %8710:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%8721:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598), )] (%8721:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598)]) -> (%8722:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598), )] (%8722:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598)]) -> (%8722:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=599))] (%8722:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598)]) -> (%8723:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)]) + cf.ReturnOp (%8723:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)]) -> () } } graph.SubGraphOp @model.layers.15.mlp [using_qnn:true, symbol:model.layers.15.mlp] { - (%1030:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)]) -> (%1035:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=608)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=603), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=602))] (%1030:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)]) -> (%1031:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=603)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=603), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604), )] (%1031:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=603)]) -> (%1032:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=606), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=605))] (%1030:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)]) -> (%1033:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=606)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=606), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604), )] (%1032:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)], %1033:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=606)]) -> (%1034:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=608), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=607))] (%1034:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)]) -> (%1035:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=608)]) - cf.ReturnOp (%1035:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=608)]) -> () + (%8725:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=601)]) -> (%8730:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=601), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=603))] (%8725:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=601)]) -> (%8726:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605), )] (%8726:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)]) -> (%8727:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=601), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=606))] (%8725:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=601)]) -> (%8728:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605), )] (%8727:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605)], %8728:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607)]) -> (%8729:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=608))] (%8729:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605)]) -> (%8730:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) + cf.ReturnOp (%8730:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) -> () } } graph.SubGraphOp @model.layers.16 [using_qnn:true, symbol:model.layers.16] { - (%1036:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=608)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %352:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %353:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%1077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=621)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=623)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=608), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), )] (%1036:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=608)]) -> (%1037:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) - graph.CallGraphOp @model.layers.16.self_attn (%1037:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %352:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %353:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%1069:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=633)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=621)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=623)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=633), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=608), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=633), )] (%1069:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=633)], %1036:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=608)]) -> (%1070:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=633)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=633), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634), )] (%1070:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=633)]) -> (%1071:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) - graph.CallGraphOp @model.layers.16.mlp (%1071:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) -> (%1076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=633), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642), )] (%1076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642)], %1070:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=633)]) -> (%1077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642)]) - cf.ReturnOp (%1077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=621)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=623)]) -> () + (%8731:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8047:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8048:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%8772:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=611))] (%8731:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) -> (%8732:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610)]) + graph.CallGraphOp @model.layers.16.self_attn (%8732:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8047:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8048:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%8764:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634), )] (%8764:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)], %8731:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) -> (%8765:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=636))] (%8765:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) -> (%8766:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635)]) + graph.CallGraphOp @model.layers.16.mlp (%8766:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635)]) -> (%8771:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643), )] (%8771:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)], %8765:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) -> (%8772:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)]) + cf.ReturnOp (%8772:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)]) -> () } } graph.SubGraphOp @model.layers.16.self_attn [using_qnn:true, symbol:model.layers.16.self_attn] { - (%1037:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %352:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %353:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%1069:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=633)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=621)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=623)]) { - linalg.CPU.LinearOp (%1037:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) -> (%1038:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=615)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=611))] (%1037:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) -> (%1039:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=614), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=613))] (%1037:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) -> (%1040:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=614)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=615), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=615), )] (%1038:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=615)]) -> (%1038:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=615)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=615), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=615), )] (%1038:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=615)]) -> (%1041:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=615)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612), )] (%1039:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612)]) -> (%1039:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612), )] (%1039:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612)]) -> (%1042:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=614), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=614), )] (%1040:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=614)]) -> (%1040:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=614)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=614), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=614), )] (%1040:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=614)]) -> (%1043:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=614)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=615), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=616), )] (%1041:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=615)]) -> (%1044:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=616)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=618), )] (%1042:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612)]) -> (%1045:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=618)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=616), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=616), )] (%1044:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=616)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1046:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=616)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=618), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=618), )] (%1045:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=618)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1047:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=618)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=618), outputs_0:QuantSpec(Raw(type: Float16), uuid=620), )] (%1047:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=618)]) -> (%1048:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=620)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=620), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=621), )] (%1048:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=620)]) -> (%1049:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=621)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=621), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=621), )] (%1049:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=621)]) -> (%1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=621)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=614), outputs_0:QuantSpec(Raw(type: Float16), uuid=622), )] (%1043:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=614)]) -> (%1051:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=622)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=622), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=623), )] (%1051:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=622)]) -> (%1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=623)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=621), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), )] (%352:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=621)]) -> (%1053:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=623), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), )] (%353:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=623)]) -> (%1054:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), )] (%1053:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) -> (%1055:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), )] (%1054:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%1056:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=616), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624), )] (%1046:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=616)], %1055:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) -> (%1057:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624), inputs_1:QuantSpec(Raw(type: Float32), uuid=625), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624), )] (%1057:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624)], %1058:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=625), constant:[0.088388346]]) -> (%1059:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626), )] (%1059:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624)]) -> (%1060:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626), inputs_1:QuantSpec(Raw(type: Int16), uuid=627), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626), )] (%1060:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626)], %1061:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=627), constant:[-20]]) -> (%1062:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=628), outputs_0:QuantSpec(Raw(type: UInt8), uuid=629), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1063:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=628), constant:[0.118652344]]) -> (%1064:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=629)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=629), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626), )] (%1064:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=629)], %1059:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624)], %1062:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626)]) -> (%1065:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=630), )] (%1065:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626)]) -> (%1066:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=630)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=630), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631), )] (%1066:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=630)], %1056:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%1067:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631), )] (%1067:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631)]) -> (%1068:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631), )] (%1068:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631)]) -> (%1068:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=633), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=632))] (%1068:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631)]) -> (%1069:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=633)]) - cf.ReturnOp (%1069:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=633)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=621)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=623)]) -> () + (%8732:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8047:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8048:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%8764:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)]) { + linalg.CPU.LinearOp (%8732:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610)]) -> (%8733:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=616)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=612))] (%8732:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610)]) -> (%8734:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=614))] (%8732:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610)]) -> (%8735:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=616), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=616), )] (%8733:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=616)]) -> (%8733:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=616)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=616), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=616), )] (%8733:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=616)]) -> (%8736:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=616)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613), )] (%8734:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613)]) -> (%8734:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613), )] (%8734:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613)]) -> (%8737:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615), )] (%8735:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615)]) -> (%8735:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615), )] (%8735:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615)]) -> (%8738:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=616), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=618))] (%8736:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=616)]) -> (%8739:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=620))] (%8737:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613)]) -> (%8740:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617), )] (%8739:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8741:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619), )] (%8740:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8742:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619), outputs_0:QuantSpec(Raw(type: Float16), uuid=621), )] (%8742:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619)]) -> (%8743:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=621)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=621), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622), )] (%8743:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=621)]) -> (%8744:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622), )] (%8744:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)]) -> (%8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615), outputs_0:QuantSpec(Raw(type: Float16), uuid=623), )] (%8738:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615)]) -> (%8746:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=623)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=623), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624), )] (%8746:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=623)]) -> (%8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), )] (%8047:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)]) -> (%8748:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), )] (%8048:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)]) -> (%8749:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), )] (%8748:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) -> (%8750:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), )] (%8749:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%8751:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625), )] (%8741:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617)], %8750:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) -> (%8752:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625), inputs_1:QuantSpec(Raw(type: Float32), uuid=626), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625), )] (%8752:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625)], %8753:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=626), constant:[0.088388346]]) -> (%8754:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627), )] (%8754:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625)]) -> (%8755:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627), inputs_1:QuantSpec(Raw(type: Int16), uuid=628), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627), )] (%8755:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627)], %8756:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=628), constant:[-20]]) -> (%8757:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=629), outputs_0:QuantSpec(Raw(type: UInt8), uuid=630), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8758:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=629), constant:[0.118652344]]) -> (%8759:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=630)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=630), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627), )] (%8759:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=630)], %8754:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625)], %8757:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627)]) -> (%8760:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631), )] (%8760:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627)]) -> (%8761:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632), )] (%8761:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631)], %8751:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%8762:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632), )] (%8762:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632)]) -> (%8763:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632), )] (%8763:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632)]) -> (%8763:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=633))] (%8763:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632)]) -> (%8764:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) + cf.ReturnOp (%8764:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)]) -> () } } graph.SubGraphOp @model.layers.16.mlp [using_qnn:true, symbol:model.layers.16.mlp] { - (%1071:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) -> (%1076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=637), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=636))] (%1071:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) -> (%1072:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=637)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=637), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=638), )] (%1072:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=637)]) -> (%1073:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=638)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=640), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=639))] (%1071:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) -> (%1074:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=640)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=638), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=640), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=638), )] (%1073:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=638)], %1074:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=640)]) -> (%1075:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=638)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=638), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=641))] (%1075:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=638)]) -> (%1076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642)]) - cf.ReturnOp (%1076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642)]) -> () + (%8766:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635)]) -> (%8771:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=638), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=637))] (%8766:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635)]) -> (%8767:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=638)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=638), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639), )] (%8767:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=638)]) -> (%8768:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=641), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=640))] (%8766:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635)]) -> (%8769:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=641)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=641), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639), )] (%8768:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639)], %8769:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=641)]) -> (%8770:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=642))] (%8770:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639)]) -> (%8771:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)]) + cf.ReturnOp (%8771:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)]) -> () } } graph.SubGraphOp @model.layers.17 [using_qnn:true, symbol:model.layers.17] { - (%1077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %354:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %355:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%1118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=676)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=655)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=657)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643), )] (%1077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642)]) -> (%1078:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)]) - graph.CallGraphOp @model.layers.17.self_attn (%1078:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %354:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %355:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%1110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=655)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=657)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667), )] (%1110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667)], %1077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642)]) -> (%1111:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668), )] (%1111:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667)]) -> (%1112:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)]) - graph.CallGraphOp @model.layers.17.mlp (%1112:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)]) -> (%1117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=676)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=676), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=676), )] (%1117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=676)], %1111:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667)]) -> (%1118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=676)]) - cf.ReturnOp (%1118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=676)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=655)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=657)]) -> () + (%8772:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8049:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8050:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%8813:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=645))] (%8772:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)]) -> (%8773:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644)]) + graph.CallGraphOp @model.layers.17.self_attn (%8773:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8049:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8050:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%8805:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668), )] (%8805:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)], %8772:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)]) -> (%8806:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=670))] (%8806:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)]) -> (%8807:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669)]) + graph.CallGraphOp @model.layers.17.mlp (%8807:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669)]) -> (%8812:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677), )] (%8812:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)], %8806:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)]) -> (%8813:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) + cf.ReturnOp (%8813:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)]) -> () } } graph.SubGraphOp @model.layers.17.self_attn [using_qnn:true, symbol:model.layers.17.self_attn] { - (%1078:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %354:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %355:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%1110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=655)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=657)]) { - linalg.CPU.LinearOp (%1078:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)]) -> (%1079:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=649)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=646), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=645))] (%1078:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)]) -> (%1080:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=646)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=648), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=647))] (%1078:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)]) -> (%1081:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=648)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=649), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=649), )] (%1079:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=649)]) -> (%1079:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=649)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=649), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=649), )] (%1079:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=649)]) -> (%1082:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=649)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=646), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=646), )] (%1080:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=646)]) -> (%1080:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=646)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=646), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=646), )] (%1080:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=646)]) -> (%1083:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=646)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=648), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=648), )] (%1081:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=648)]) -> (%1081:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=648)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=648), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=648), )] (%1081:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=648)]) -> (%1084:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=648)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=649), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=650), )] (%1082:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=649)]) -> (%1085:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=650)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=646), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=652), )] (%1083:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=646)]) -> (%1086:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=652)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=650), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=650), )] (%1085:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=650)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1087:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=650)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=652), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=652), )] (%1086:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=652)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1088:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=652)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=652), outputs_0:QuantSpec(Raw(type: Float16), uuid=654), )] (%1088:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=652)]) -> (%1089:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=654)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=654), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=655), )] (%1089:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=654)]) -> (%1090:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=655)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=655), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=655), )] (%1090:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=655)]) -> (%1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=655)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=648), outputs_0:QuantSpec(Raw(type: Float16), uuid=656), )] (%1084:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=648)]) -> (%1092:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=656)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=656), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=657), )] (%1092:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=656)]) -> (%1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=657)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=655), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), )] (%354:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=655)]) -> (%1094:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=657), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), )] (%355:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=657)]) -> (%1095:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), )] (%1094:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) -> (%1096:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), )] (%1095:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%1097:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=650), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=658), )] (%1087:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=650)], %1096:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) -> (%1098:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=658)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=658), inputs_1:QuantSpec(Raw(type: Float32), uuid=659), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=658), )] (%1098:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=658)], %1099:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=659), constant:[0.088388346]]) -> (%1100:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=658)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=658), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660), )] (%1100:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=658)]) -> (%1101:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660), inputs_1:QuantSpec(Raw(type: Int16), uuid=661), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660), )] (%1101:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660)], %1102:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=661), constant:[-20]]) -> (%1103:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=662), outputs_0:QuantSpec(Raw(type: UInt8), uuid=663), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1104:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=662), constant:[-0.99609375]]) -> (%1105:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=663)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=663), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=658), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660), )] (%1105:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=663)], %1100:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=658)], %1103:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660)]) -> (%1106:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664), )] (%1106:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660)]) -> (%1107:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665), )] (%1107:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664)], %1097:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%1108:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665), )] (%1108:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665)]) -> (%1109:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665), )] (%1109:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665)]) -> (%1109:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=666))] (%1109:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665)]) -> (%1110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667)]) - cf.ReturnOp (%1110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=655)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=657)]) -> () + (%8773:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8049:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8050:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%8805:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)]) { + linalg.CPU.LinearOp (%8773:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644)]) -> (%8774:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=650)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=646))] (%8773:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644)]) -> (%8775:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=648))] (%8773:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644)]) -> (%8776:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=650), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=650), )] (%8774:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=650)]) -> (%8774:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=650)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=650), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=650), )] (%8774:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=650)]) -> (%8777:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=650)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647), )] (%8775:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647)]) -> (%8775:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647), )] (%8775:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647)]) -> (%8778:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649), )] (%8776:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649)]) -> (%8776:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649), )] (%8776:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649)]) -> (%8779:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=650), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=651), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=652))] (%8777:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=650)]) -> (%8780:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=651)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=654))] (%8778:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647)]) -> (%8781:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=651), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=651), )] (%8780:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=651)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8782:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=651)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653), )] (%8781:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8783:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653), outputs_0:QuantSpec(Raw(type: Float16), uuid=655), )] (%8783:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653)]) -> (%8784:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=655)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=655), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656), )] (%8784:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=655)]) -> (%8785:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656), )] (%8785:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)]) -> (%8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649), outputs_0:QuantSpec(Raw(type: Float16), uuid=657), )] (%8779:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649)]) -> (%8787:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=657)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=657), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658), )] (%8787:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=657)]) -> (%8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), )] (%8049:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)]) -> (%8789:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), )] (%8050:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)]) -> (%8790:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), )] (%8789:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) -> (%8791:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), )] (%8790:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%8792:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=651), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659), )] (%8782:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=651)], %8791:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) -> (%8793:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659), inputs_1:QuantSpec(Raw(type: Float32), uuid=660), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659), )] (%8793:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659)], %8794:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=660), constant:[0.088388346]]) -> (%8795:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661), )] (%8795:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659)]) -> (%8796:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661), inputs_1:QuantSpec(Raw(type: Int16), uuid=662), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661), )] (%8796:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661)], %8797:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=662), constant:[-20]]) -> (%8798:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=663), outputs_0:QuantSpec(Raw(type: UInt8), uuid=664), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8799:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=663), constant:[-0.99609375]]) -> (%8800:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=664)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=664), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661), )] (%8800:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=664)], %8795:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659)], %8798:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661)]) -> (%8801:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665), )] (%8801:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661)]) -> (%8802:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666), )] (%8802:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665)], %8792:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%8803:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666), )] (%8803:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666)]) -> (%8804:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666), )] (%8804:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666)]) -> (%8804:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=667))] (%8804:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666)]) -> (%8805:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)]) + cf.ReturnOp (%8805:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)]) -> () } } graph.SubGraphOp @model.layers.17.mlp [using_qnn:true, symbol:model.layers.17.mlp] { - (%1112:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)]) -> (%1117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=676)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=671), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=670))] (%1112:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)]) -> (%1113:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=671)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=671), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672), )] (%1113:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=671)]) -> (%1114:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=674), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=673))] (%1112:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)]) -> (%1115:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=674)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=674), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672), )] (%1114:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672)], %1115:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=674)]) -> (%1116:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=676), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=675))] (%1116:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672)]) -> (%1117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=676)]) - cf.ReturnOp (%1117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=676)]) -> () + (%8807:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669)]) -> (%8812:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=671))] (%8807:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669)]) -> (%8808:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=673), )] (%8808:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672)]) -> (%8809:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=673)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=675), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=674))] (%8807:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669)]) -> (%8810:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=675)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=673), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=675), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=673), )] (%8809:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=673)], %8810:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=675)]) -> (%8811:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=673)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=673), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=676))] (%8811:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=673)]) -> (%8812:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) + cf.ReturnOp (%8812:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) -> () } } graph.SubGraphOp @model.layers.18 [using_qnn:true, symbol:model.layers.18] { - (%1118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=676)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %356:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %357:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%1159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=710)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=689)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=691)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=676), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677), )] (%1118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=676)]) -> (%1119:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) - graph.CallGraphOp @model.layers.18.self_attn (%1119:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %356:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %357:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%1151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=689)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=691)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=676), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701), )] (%1151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701)], %1118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=676)]) -> (%1152:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702), )] (%1152:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701)]) -> (%1153:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)]) - graph.CallGraphOp @model.layers.18.mlp (%1153:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)]) -> (%1158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=710)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=710), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=710), )] (%1158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=710)], %1152:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701)]) -> (%1159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=710)]) - cf.ReturnOp (%1159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=710)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=689)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=691)]) -> () + (%8813:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8051:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8052:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%8854:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=679))] (%8813:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) -> (%8814:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678)]) + graph.CallGraphOp @model.layers.18.self_attn (%8814:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8051:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8052:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%8846:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702), )] (%8846:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)], %8813:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) -> (%8847:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=703), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=704))] (%8847:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)]) -> (%8848:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=703)]) + graph.CallGraphOp @model.layers.18.mlp (%8848:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=703)]) -> (%8853:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711), )] (%8853:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)], %8847:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)]) -> (%8854:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)]) + cf.ReturnOp (%8854:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)]) -> () } } graph.SubGraphOp @model.layers.18.self_attn [using_qnn:true, symbol:model.layers.18.self_attn] { - (%1119:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %356:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %357:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%1151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=689)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=691)]) { - linalg.CPU.LinearOp (%1119:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) -> (%1120:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=683)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=680), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=679))] (%1119:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) -> (%1121:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=680)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=682), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=681))] (%1119:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) -> (%1122:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=682)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=683), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=683), )] (%1120:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=683)]) -> (%1120:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=683)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=683), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=683), )] (%1120:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=683)]) -> (%1123:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=683)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=680), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=680), )] (%1121:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=680)]) -> (%1121:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=680)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=680), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=680), )] (%1121:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=680)]) -> (%1124:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=680)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=682), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=682), )] (%1122:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=682)]) -> (%1122:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=682)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=682), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=682), )] (%1122:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=682)]) -> (%1125:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=682)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=683), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684), )] (%1123:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=683)]) -> (%1126:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=680), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686), )] (%1124:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=680)]) -> (%1127:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684), )] (%1126:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1128:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686), )] (%1127:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1129:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686), outputs_0:QuantSpec(Raw(type: Float16), uuid=688), )] (%1129:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686)]) -> (%1130:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=688)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=688), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=689), )] (%1130:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=688)]) -> (%1131:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=689)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=689), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=689), )] (%1131:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=689)]) -> (%1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=689)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=682), outputs_0:QuantSpec(Raw(type: Float16), uuid=690), )] (%1125:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=682)]) -> (%1133:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=690)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=690), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=691), )] (%1133:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=690)]) -> (%1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=691)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=689), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), )] (%356:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=689)]) -> (%1135:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=691), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), )] (%357:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=691)]) -> (%1136:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), )] (%1135:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) -> (%1137:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), )] (%1136:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%1138:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=692), )] (%1128:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684)], %1137:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) -> (%1139:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=692)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=692), inputs_1:QuantSpec(Raw(type: Float32), uuid=693), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=692), )] (%1139:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=692)], %1140:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=693), constant:[0.088388346]]) -> (%1141:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=692)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=692), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694), )] (%1141:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=692)]) -> (%1142:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694), inputs_1:QuantSpec(Raw(type: Int16), uuid=695), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694), )] (%1142:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)], %1143:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=695), constant:[-20]]) -> (%1144:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=696), outputs_0:QuantSpec(Raw(type: UInt8), uuid=697), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1145:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=696), constant:[0.24023438]]) -> (%1146:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=697)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=697), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=692), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694), )] (%1146:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=697)], %1141:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=692)], %1144:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)]) -> (%1147:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=698), )] (%1147:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)]) -> (%1148:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=698)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=698), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699), )] (%1148:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=698)], %1138:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%1149:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699), )] (%1149:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699)]) -> (%1150:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699), )] (%1150:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699)]) -> (%1150:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=700))] (%1150:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699)]) -> (%1151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701)]) - cf.ReturnOp (%1151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=689)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=691)]) -> () + (%8814:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8051:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8052:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%8846:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)]) { + linalg.CPU.LinearOp (%8814:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678)]) -> (%8815:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=684)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=680))] (%8814:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678)]) -> (%8816:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=682))] (%8814:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678)]) -> (%8817:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=684), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=684), )] (%8815:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=684)]) -> (%8815:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=684)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=684), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=684), )] (%8815:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=684)]) -> (%8818:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=684)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681), )] (%8816:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681)]) -> (%8816:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681), )] (%8816:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681)]) -> (%8819:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683), )] (%8817:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683)]) -> (%8817:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683), )] (%8817:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683)]) -> (%8820:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=684), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=685), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=686))] (%8818:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=684)]) -> (%8821:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=685)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=688))] (%8819:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681)]) -> (%8822:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=685), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=685), )] (%8821:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=685)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8823:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=685)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687), )] (%8822:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8824:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687), outputs_0:QuantSpec(Raw(type: Float16), uuid=689), )] (%8824:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687)]) -> (%8825:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=689)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=689), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690), )] (%8825:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=689)]) -> (%8826:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690), )] (%8826:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)]) -> (%8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683), outputs_0:QuantSpec(Raw(type: Float16), uuid=691), )] (%8820:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683)]) -> (%8828:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=691)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=691), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692), )] (%8828:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=691)]) -> (%8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), )] (%8051:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)]) -> (%8830:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), )] (%8052:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)]) -> (%8831:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), )] (%8830:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) -> (%8832:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), )] (%8831:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%8833:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=685), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693), )] (%8823:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=685)], %8832:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) -> (%8834:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693), inputs_1:QuantSpec(Raw(type: Float32), uuid=694), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693), )] (%8834:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693)], %8835:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=694), constant:[0.088388346]]) -> (%8836:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695), )] (%8836:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693)]) -> (%8837:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695), inputs_1:QuantSpec(Raw(type: Int16), uuid=696), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695), )] (%8837:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695)], %8838:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=696), constant:[-20]]) -> (%8839:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=697), outputs_0:QuantSpec(Raw(type: UInt8), uuid=698), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8840:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=697), constant:[0.24023438]]) -> (%8841:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=698)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=698), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695), )] (%8841:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=698)], %8836:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693)], %8839:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695)]) -> (%8842:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699), )] (%8842:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695)]) -> (%8843:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700), )] (%8843:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699)], %8833:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%8844:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700), )] (%8844:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700)]) -> (%8845:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700), )] (%8845:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700)]) -> (%8845:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=701))] (%8845:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700)]) -> (%8846:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)]) + cf.ReturnOp (%8846:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)]) -> () } } graph.SubGraphOp @model.layers.18.mlp [using_qnn:true, symbol:model.layers.18.mlp] { - (%1153:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)]) -> (%1158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=710)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=705), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=704))] (%1153:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)]) -> (%1154:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=705)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=705), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=706), )] (%1154:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=705)]) -> (%1155:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=706)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=708), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=707))] (%1153:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)]) -> (%1156:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=708)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=706), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=708), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=706), )] (%1155:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=706)], %1156:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=708)]) -> (%1157:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=706)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=706), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=710), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=709))] (%1157:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=706)]) -> (%1158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=710)]) - cf.ReturnOp (%1158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=710)]) -> () + (%8848:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=703)]) -> (%8853:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=703), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=706), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=705))] (%8848:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=703)]) -> (%8849:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=706)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=706), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707), )] (%8849:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=706)]) -> (%8850:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=703), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=708))] (%8848:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=703)]) -> (%8851:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707), )] (%8850:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707)], %8851:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709)]) -> (%8852:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=710))] (%8852:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707)]) -> (%8853:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)]) + cf.ReturnOp (%8853:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)]) -> () } } graph.SubGraphOp @model.layers.19 [using_qnn:true, symbol:model.layers.19] { - (%1159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=710)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %358:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %359:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%1200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=723)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=725)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=710), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711), )] (%1159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=710)]) -> (%1160:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)]) - graph.CallGraphOp @model.layers.19.self_attn (%1160:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %358:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %359:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%1192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=735)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=723)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=725)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=735), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=710), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=735), )] (%1192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=735)], %1159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=710)]) -> (%1193:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=735)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=735), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736), )] (%1193:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=735)]) -> (%1194:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)]) - graph.CallGraphOp @model.layers.19.mlp (%1194:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)]) -> (%1199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=735), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744), )] (%1199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)], %1193:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=735)]) -> (%1200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)]) - cf.ReturnOp (%1200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=723)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=725)]) -> () + (%8854:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8053:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8054:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%8895:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=713))] (%8854:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)]) -> (%8855:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712)]) + graph.CallGraphOp @model.layers.19.self_attn (%8855:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8053:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8054:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%8887:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736), )] (%8887:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)], %8854:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)]) -> (%8888:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=738))] (%8888:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)]) -> (%8889:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737)]) + graph.CallGraphOp @model.layers.19.mlp (%8889:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737)]) -> (%8894:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745), )] (%8894:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)], %8888:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)]) -> (%8895:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)]) + cf.ReturnOp (%8895:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)]) -> () } } graph.SubGraphOp @model.layers.19.self_attn [using_qnn:true, symbol:model.layers.19.self_attn] { - (%1160:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %358:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %359:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%1192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=735)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=723)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=725)]) { - linalg.CPU.LinearOp (%1160:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)]) -> (%1161:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=717)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=713))] (%1160:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)]) -> (%1162:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=715))] (%1160:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)]) -> (%1163:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=717), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=717), )] (%1161:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=717)]) -> (%1161:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=717)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=717), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=717), )] (%1161:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=717)]) -> (%1164:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=717)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714), )] (%1162:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714)]) -> (%1162:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714), )] (%1162:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714)]) -> (%1165:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716), )] (%1163:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716)]) -> (%1163:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716), )] (%1163:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716)]) -> (%1166:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=717), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=718), )] (%1164:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=717)]) -> (%1167:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=718)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=720), )] (%1165:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714)]) -> (%1168:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=720)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=718), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=718), )] (%1167:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=718)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1169:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=718)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=720), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=720), )] (%1168:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=720)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1170:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=720)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=720), outputs_0:QuantSpec(Raw(type: Float16), uuid=722), )] (%1170:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=720)]) -> (%1171:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=722)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=722), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=723), )] (%1171:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=722)]) -> (%1172:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=723)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=723), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=723), )] (%1172:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=723)]) -> (%1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=723)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716), outputs_0:QuantSpec(Raw(type: Float16), uuid=724), )] (%1166:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716)]) -> (%1174:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=724)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=724), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=725), )] (%1174:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=724)]) -> (%1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=725)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=723), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), )] (%358:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=723)]) -> (%1176:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=725), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), )] (%359:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=725)]) -> (%1177:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), )] (%1176:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) -> (%1178:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), )] (%1177:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%1179:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=718), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=726), )] (%1169:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=718)], %1178:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) -> (%1180:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=726)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=726), inputs_1:QuantSpec(Raw(type: Float32), uuid=727), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=726), )] (%1180:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=726)], %1181:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=727), constant:[0.088388346]]) -> (%1182:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=726)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=726), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=728), )] (%1182:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=726)]) -> (%1183:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=728)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=728), inputs_1:QuantSpec(Raw(type: Int16), uuid=729), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=728), )] (%1183:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=728)], %1184:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=729), constant:[-20]]) -> (%1185:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=728)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=730), outputs_0:QuantSpec(Raw(type: UInt8), uuid=731), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1186:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=730), constant:[0.55078125]]) -> (%1187:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=731)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=731), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=726), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=728), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=728), )] (%1187:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=731)], %1182:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=726)], %1185:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=728)]) -> (%1188:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=728)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=728), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=732), )] (%1188:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=728)]) -> (%1189:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=732)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=732), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733), )] (%1189:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=732)], %1179:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%1190:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733), )] (%1190:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733)]) -> (%1191:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733), )] (%1191:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733)]) -> (%1191:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=735), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=734))] (%1191:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733)]) -> (%1192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=735)]) - cf.ReturnOp (%1192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=735)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=723)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=725)]) -> () + (%8855:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8053:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8054:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%8887:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)]) { + linalg.CPU.LinearOp (%8855:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712)]) -> (%8856:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=718)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=714))] (%8855:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712)]) -> (%8857:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=716))] (%8855:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712)]) -> (%8858:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=718), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=718), )] (%8856:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=718)]) -> (%8856:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=718)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=718), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=718), )] (%8856:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=718)]) -> (%8859:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=718)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715), )] (%8857:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715)]) -> (%8857:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715), )] (%8857:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715)]) -> (%8860:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717), )] (%8858:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717)]) -> (%8858:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717), )] (%8858:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717)]) -> (%8861:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=718), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=720))] (%8859:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=718)]) -> (%8862:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=721), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=722))] (%8860:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715)]) -> (%8863:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=721)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719), )] (%8862:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8864:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=721), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=721), )] (%8863:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=721)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8865:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=721)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=721), outputs_0:QuantSpec(Raw(type: Float16), uuid=723), )] (%8865:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=721)]) -> (%8866:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=723)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=723), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724), )] (%8866:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=723)]) -> (%8867:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724), )] (%8867:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)]) -> (%8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717), outputs_0:QuantSpec(Raw(type: Float16), uuid=725), )] (%8861:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717)]) -> (%8869:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=725)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=725), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726), )] (%8869:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=725)]) -> (%8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), )] (%8053:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)]) -> (%8871:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), )] (%8054:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)]) -> (%8872:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), )] (%8871:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) -> (%8873:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), )] (%8872:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%8874:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727), )] (%8864:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719)], %8873:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) -> (%8875:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727), inputs_1:QuantSpec(Raw(type: Float32), uuid=728), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727), )] (%8875:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727)], %8876:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=728), constant:[0.088388346]]) -> (%8877:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729), )] (%8877:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727)]) -> (%8878:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729), inputs_1:QuantSpec(Raw(type: Int16), uuid=730), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729), )] (%8878:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729)], %8879:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=730), constant:[-20]]) -> (%8880:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=731), outputs_0:QuantSpec(Raw(type: UInt8), uuid=732), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8881:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=731), constant:[0.55078125]]) -> (%8882:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=732)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=732), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729), )] (%8882:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=732)], %8877:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727)], %8880:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729)]) -> (%8883:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733), )] (%8883:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729)]) -> (%8884:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734), )] (%8884:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733)], %8874:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%8885:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734), )] (%8885:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734)]) -> (%8886:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734), )] (%8886:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734)]) -> (%8886:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=735))] (%8886:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734)]) -> (%8887:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)]) + cf.ReturnOp (%8887:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)]) -> () } } graph.SubGraphOp @model.layers.19.mlp [using_qnn:true, symbol:model.layers.19.mlp] { - (%1194:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)]) -> (%1199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=739), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=738))] (%1194:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)]) -> (%1195:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=739)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=739), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=740), )] (%1195:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=739)]) -> (%1196:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=740)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=742), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=741))] (%1194:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)]) -> (%1197:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=742)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=740), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=742), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=740), )] (%1196:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=740)], %1197:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=742)]) -> (%1198:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=740)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=740), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=743))] (%1198:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=740)]) -> (%1199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)]) - cf.ReturnOp (%1199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)]) -> () + (%8889:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737)]) -> (%8894:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=740), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=739))] (%8889:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737)]) -> (%8890:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=740)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=740), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=741), )] (%8890:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=740)]) -> (%8891:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=741)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=743), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=742))] (%8889:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737)]) -> (%8892:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=743)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=741), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=743), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=741), )] (%8891:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=741)], %8892:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=743)]) -> (%8893:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=741)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=741), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=744))] (%8893:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=741)]) -> (%8894:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)]) + cf.ReturnOp (%8894:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)]) -> () } } graph.SubGraphOp @model.layers.20 [using_qnn:true, symbol:model.layers.20] { - (%1200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %360:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %361:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%1241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=778)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=757)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=759)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745), )] (%1200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)]) -> (%1201:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)]) - graph.CallGraphOp @model.layers.20.self_attn (%1201:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %360:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %361:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%1233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=757)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=759)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769), )] (%1233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769)], %1200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)]) -> (%1234:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770), )] (%1234:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769)]) -> (%1235:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)]) - graph.CallGraphOp @model.layers.20.mlp (%1235:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)]) -> (%1240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=778)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=778), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=778), )] (%1240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=778)], %1234:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769)]) -> (%1241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=778)]) - cf.ReturnOp (%1241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=778)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=757)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=759)]) -> () + (%8895:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8055:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8056:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%8936:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=747))] (%8895:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)]) -> (%8896:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)]) + graph.CallGraphOp @model.layers.20.self_attn (%8896:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8055:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8056:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%8928:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770), )] (%8928:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)], %8895:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)]) -> (%8929:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=771), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=772))] (%8929:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)]) -> (%8930:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=771)]) + graph.CallGraphOp @model.layers.20.mlp (%8930:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=771)]) -> (%8935:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779), )] (%8935:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)], %8929:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)]) -> (%8936:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) + cf.ReturnOp (%8936:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)]) -> () } } graph.SubGraphOp @model.layers.20.self_attn [using_qnn:true, symbol:model.layers.20.self_attn] { - (%1201:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %360:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %361:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%1233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=757)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=759)]) { - linalg.CPU.LinearOp (%1201:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)]) -> (%1202:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=751)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=748), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=747))] (%1201:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)]) -> (%1203:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=748)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=749))] (%1201:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)]) -> (%1204:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=751), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=751), )] (%1202:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=751)]) -> (%1202:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=751)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=751), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=751), )] (%1202:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=751)]) -> (%1205:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=751)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=748), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=748), )] (%1203:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=748)]) -> (%1203:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=748)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=748), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=748), )] (%1203:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=748)]) -> (%1206:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=748)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750), )] (%1204:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750)]) -> (%1204:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750), )] (%1204:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750)]) -> (%1207:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=751), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=752), )] (%1205:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=751)]) -> (%1208:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=752)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=748), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754), )] (%1206:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=748)]) -> (%1209:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=752), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=752), )] (%1208:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=752)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1210:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=752)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754), )] (%1209:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1211:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754), outputs_0:QuantSpec(Raw(type: Float16), uuid=756), )] (%1211:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754)]) -> (%1212:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=756)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=756), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=757), )] (%1212:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=756)]) -> (%1213:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=757)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=757), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=757), )] (%1213:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=757)]) -> (%1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=757)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750), outputs_0:QuantSpec(Raw(type: Float16), uuid=758), )] (%1207:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750)]) -> (%1215:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=758)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=758), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=759), )] (%1215:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=758)]) -> (%1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=759)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=757), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), )] (%360:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=757)]) -> (%1217:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=759), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), )] (%361:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=759)]) -> (%1218:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), )] (%1217:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) -> (%1219:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), )] (%1218:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%1220:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=752), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=760), )] (%1210:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=752)], %1219:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) -> (%1221:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=760)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=760), inputs_1:QuantSpec(Raw(type: Float32), uuid=761), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=760), )] (%1221:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=760)], %1222:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=761), constant:[0.088388346]]) -> (%1223:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=760)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=760), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762), )] (%1223:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=760)]) -> (%1224:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762), inputs_1:QuantSpec(Raw(type: Int16), uuid=763), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762), )] (%1224:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762)], %1225:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=763), constant:[-20]]) -> (%1226:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=764), outputs_0:QuantSpec(Raw(type: UInt8), uuid=765), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1227:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=764), constant:[0.71875]]) -> (%1228:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=765)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=765), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=760), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762), )] (%1228:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=765)], %1223:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=760)], %1226:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762)]) -> (%1229:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=766), )] (%1229:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762)]) -> (%1230:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=766)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=766), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767), )] (%1230:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=766)], %1220:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%1231:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767), )] (%1231:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767)]) -> (%1232:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767), )] (%1232:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767)]) -> (%1232:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=768))] (%1232:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767)]) -> (%1233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769)]) - cf.ReturnOp (%1233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=757)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=759)]) -> () + (%8896:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8055:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8056:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%8928:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)]) { + linalg.CPU.LinearOp (%8896:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)]) -> (%8897:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=752)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=748))] (%8896:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)]) -> (%8898:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=750))] (%8896:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)]) -> (%8899:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=752), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=752), )] (%8897:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=752)]) -> (%8897:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=752)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=752), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=752), )] (%8897:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=752)]) -> (%8900:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=752)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749), )] (%8898:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749)]) -> (%8898:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749), )] (%8898:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749)]) -> (%8901:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751), )] (%8899:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751)]) -> (%8899:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751), )] (%8899:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751)]) -> (%8902:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=752), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=753), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=754))] (%8900:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=752)]) -> (%8903:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=753)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=756))] (%8901:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749)]) -> (%8904:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=753), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=753), )] (%8903:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=753)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8905:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=753)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755), )] (%8904:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8906:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755), outputs_0:QuantSpec(Raw(type: Float16), uuid=757), )] (%8906:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755)]) -> (%8907:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=757)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=757), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758), )] (%8907:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=757)]) -> (%8908:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758), )] (%8908:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)]) -> (%8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751), outputs_0:QuantSpec(Raw(type: Float16), uuid=759), )] (%8902:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751)]) -> (%8910:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=759)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=759), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760), )] (%8910:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=759)]) -> (%8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), )] (%8055:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)]) -> (%8912:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), )] (%8056:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)]) -> (%8913:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), )] (%8912:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) -> (%8914:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), )] (%8913:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%8915:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=753), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761), )] (%8905:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=753)], %8914:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) -> (%8916:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761), inputs_1:QuantSpec(Raw(type: Float32), uuid=762), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761), )] (%8916:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761)], %8917:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=762), constant:[0.088388346]]) -> (%8918:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763), )] (%8918:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761)]) -> (%8919:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763), inputs_1:QuantSpec(Raw(type: Int16), uuid=764), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763), )] (%8919:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763)], %8920:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=764), constant:[-20]]) -> (%8921:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=765), outputs_0:QuantSpec(Raw(type: UInt8), uuid=766), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8922:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=765), constant:[0.71875]]) -> (%8923:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=766)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=766), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763), )] (%8923:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=766)], %8918:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761)], %8921:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763)]) -> (%8924:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767), )] (%8924:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763)]) -> (%8925:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768), )] (%8925:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767)], %8915:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%8926:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768), )] (%8926:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768)]) -> (%8927:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768), )] (%8927:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768)]) -> (%8927:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=769))] (%8927:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768)]) -> (%8928:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)]) + cf.ReturnOp (%8928:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)]) -> () } } graph.SubGraphOp @model.layers.20.mlp [using_qnn:true, symbol:model.layers.20.mlp] { - (%1235:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)]) -> (%1240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=778)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=773), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=772))] (%1235:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)]) -> (%1236:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=773)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=773), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774), )] (%1236:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=773)]) -> (%1237:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=776), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=775))] (%1235:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)]) -> (%1238:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=776)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=776), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774), )] (%1237:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774)], %1238:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=776)]) -> (%1239:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=778), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=777))] (%1239:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774)]) -> (%1240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=778)]) - cf.ReturnOp (%1240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=778)]) -> () + (%8930:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=771)]) -> (%8935:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=771), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=773))] (%8930:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=771)]) -> (%8931:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=775), )] (%8931:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774)]) -> (%8932:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=775)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=771), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=777), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=776))] (%8930:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=771)]) -> (%8933:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=777)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=775), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=777), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=775), )] (%8932:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=775)], %8933:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=777)]) -> (%8934:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=775)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=775), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=778))] (%8934:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=775)]) -> (%8935:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) + cf.ReturnOp (%8935:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) -> () } } graph.SubGraphOp @model.layers.21 [using_qnn:true, symbol:model.layers.21] { - (%1241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=778)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %362:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %363:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%1282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=791)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=793)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=778), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779), )] (%1241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=778)]) -> (%1242:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) - graph.CallGraphOp @model.layers.21.self_attn (%1242:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %362:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %363:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%1274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=803)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=791)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=793)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=803), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=778), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=803), )] (%1274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=803)], %1241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=778)]) -> (%1275:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=803)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=803), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), )] (%1275:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=803)]) -> (%1276:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) - graph.CallGraphOp @model.layers.21.mlp (%1276:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) -> (%1281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=803), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812), )] (%1281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812)], %1275:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=803)]) -> (%1282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812)]) - cf.ReturnOp (%1282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=791)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=793)]) -> () + (%8936:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8057:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8058:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%8977:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=781))] (%8936:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) -> (%8937:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780)]) + graph.CallGraphOp @model.layers.21.self_attn (%8937:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8057:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8058:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%8969:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), )] (%8969:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)], %8936:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) -> (%8970:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=805), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=806))] (%8970:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) -> (%8971:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=805)]) + graph.CallGraphOp @model.layers.21.mlp (%8971:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=805)]) -> (%8976:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813), )] (%8976:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)], %8970:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) -> (%8977:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)]) + cf.ReturnOp (%8977:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) -> () } } graph.SubGraphOp @model.layers.21.self_attn [using_qnn:true, symbol:model.layers.21.self_attn] { - (%1242:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %362:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %363:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%1274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=803)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=791)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=793)]) { - linalg.CPU.LinearOp (%1242:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) -> (%1243:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=785)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=782), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=781))] (%1242:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) -> (%1244:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=782)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=783))] (%1242:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) -> (%1245:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=785), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=785), )] (%1243:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=785)]) -> (%1243:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=785)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=785), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=785), )] (%1243:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=785)]) -> (%1246:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=785)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=782), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=782), )] (%1244:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=782)]) -> (%1244:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=782)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=782), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=782), )] (%1244:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=782)]) -> (%1247:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=782)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784), )] (%1245:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)]) -> (%1245:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784), )] (%1245:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)]) -> (%1248:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=785), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=786), )] (%1246:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=785)]) -> (%1249:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=786)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=782), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=788), )] (%1247:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=782)]) -> (%1250:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=788)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=786), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=786), )] (%1249:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=786)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1251:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=786)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=788), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=788), )] (%1250:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=788)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1252:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=788)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=788), outputs_0:QuantSpec(Raw(type: Float16), uuid=790), )] (%1252:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=788)]) -> (%1253:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=790)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=790), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=791), )] (%1253:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=790)]) -> (%1254:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=791)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=791), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=791), )] (%1254:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=791)]) -> (%1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=791)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784), outputs_0:QuantSpec(Raw(type: Float16), uuid=792), )] (%1248:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)]) -> (%1256:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=792)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=792), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=793), )] (%1256:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=792)]) -> (%1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=793)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=791), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), )] (%362:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=791)]) -> (%1258:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=793), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), )] (%363:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=793)]) -> (%1259:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), )] (%1258:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) -> (%1260:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), )] (%1259:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%1261:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=786), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=794), )] (%1251:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=786)], %1260:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) -> (%1262:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=794)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=794), inputs_1:QuantSpec(Raw(type: Float32), uuid=795), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=794), )] (%1262:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=794)], %1263:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=795), constant:[0.088388346]]) -> (%1264:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=794)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=794), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=796), )] (%1264:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=794)]) -> (%1265:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=796)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=796), inputs_1:QuantSpec(Raw(type: Int16), uuid=797), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=796), )] (%1265:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=796)], %1266:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=797), constant:[-20]]) -> (%1267:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=796)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=798), outputs_0:QuantSpec(Raw(type: UInt8), uuid=799), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1268:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=798), constant:[-0.80859375]]) -> (%1269:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=799)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=799), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=794), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=796), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=796), )] (%1269:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=799)], %1264:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=794)], %1267:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=796)]) -> (%1270:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=796)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=796), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=800), )] (%1270:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=796)]) -> (%1271:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=800)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=800), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801), )] (%1271:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=800)], %1261:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%1272:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801), )] (%1272:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801)]) -> (%1273:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801), )] (%1273:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801)]) -> (%1273:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=803), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=802))] (%1273:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801)]) -> (%1274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=803)]) - cf.ReturnOp (%1274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=803)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=791)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=793)]) -> () + (%8937:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8057:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8058:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%8969:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) { + linalg.CPU.LinearOp (%8937:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780)]) -> (%8938:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=786)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=782))] (%8937:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780)]) -> (%8939:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=784))] (%8937:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780)]) -> (%8940:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=786), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=786), )] (%8938:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=786)]) -> (%8938:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=786)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=786), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=786), )] (%8938:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=786)]) -> (%8941:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=786)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783), )] (%8939:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783)]) -> (%8939:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783), )] (%8939:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783)]) -> (%8942:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785), )] (%8940:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785)]) -> (%8940:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785), )] (%8940:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785)]) -> (%8943:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=786), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=788))] (%8941:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=786)]) -> (%8944:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=790))] (%8942:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783)]) -> (%8945:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787), )] (%8944:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8946:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789), )] (%8945:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8947:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789), outputs_0:QuantSpec(Raw(type: Float16), uuid=791), )] (%8947:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789)]) -> (%8948:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=791)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=791), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792), )] (%8948:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=791)]) -> (%8949:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792), )] (%8949:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)]) -> (%8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785), outputs_0:QuantSpec(Raw(type: Float16), uuid=793), )] (%8943:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785)]) -> (%8951:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=793)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=793), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794), )] (%8951:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=793)]) -> (%8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), )] (%8057:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)]) -> (%8953:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), )] (%8058:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) -> (%8954:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), )] (%8953:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) -> (%8955:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), )] (%8954:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%8956:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795), )] (%8946:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787)], %8955:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) -> (%8957:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795), inputs_1:QuantSpec(Raw(type: Float32), uuid=796), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795), )] (%8957:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795)], %8958:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=796), constant:[0.088388346]]) -> (%8959:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797), )] (%8959:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795)]) -> (%8960:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797), inputs_1:QuantSpec(Raw(type: Int16), uuid=798), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797), )] (%8960:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797)], %8961:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=798), constant:[-20]]) -> (%8962:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=799), outputs_0:QuantSpec(Raw(type: UInt8), uuid=800), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8963:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=799), constant:[-0.80859375]]) -> (%8964:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=800)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=800), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797), )] (%8964:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=800)], %8959:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795)], %8962:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797)]) -> (%8965:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801), )] (%8965:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797)]) -> (%8966:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802), )] (%8966:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801)], %8956:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%8967:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802), )] (%8967:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802)]) -> (%8968:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802), )] (%8968:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802)]) -> (%8968:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=803))] (%8968:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802)]) -> (%8969:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) + cf.ReturnOp (%8969:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) -> () } } graph.SubGraphOp @model.layers.21.mlp [using_qnn:true, symbol:model.layers.21.mlp] { - (%1276:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) -> (%1281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=807), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=806))] (%1276:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) -> (%1277:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=807)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=807), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=808), )] (%1277:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=807)]) -> (%1278:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=808)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=810), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=809))] (%1276:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) -> (%1279:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=810)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=808), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=810), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=808), )] (%1278:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=808)], %1279:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=810)]) -> (%1280:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=808)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=808), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=811))] (%1280:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=808)]) -> (%1281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812)]) - cf.ReturnOp (%1281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812)]) -> () + (%8971:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=805)]) -> (%8976:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=805), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=808), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=807))] (%8971:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=805)]) -> (%8972:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=808)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=808), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809), )] (%8972:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=808)]) -> (%8973:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=805), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=811), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=810))] (%8971:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=805)]) -> (%8974:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=811)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=811), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809), )] (%8973:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809)], %8974:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=811)]) -> (%8975:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=812))] (%8975:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809)]) -> (%8976:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)]) + cf.ReturnOp (%8976:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)]) -> () } } graph.SubGraphOp @model.layers.22 [using_qnn:true, symbol:model.layers.22] { - (%1282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %364:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %365:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%1323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=846)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=825)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=827)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813), )] (%1282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812)]) -> (%1283:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)]) - graph.CallGraphOp @model.layers.22.self_attn (%1283:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %364:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %365:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%1315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=825)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=827)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837), )] (%1315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837)], %1282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812)]) -> (%1316:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838), )] (%1316:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837)]) -> (%1317:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)]) - graph.CallGraphOp @model.layers.22.mlp (%1317:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)]) -> (%1322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=846)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=846), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=846), )] (%1322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=846)], %1316:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837)]) -> (%1323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=846)]) - cf.ReturnOp (%1323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=846)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=825)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=827)]) -> () + (%8977:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8059:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8060:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%9018:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=815))] (%8977:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)]) -> (%8978:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)]) + graph.CallGraphOp @model.layers.22.self_attn (%8978:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8059:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8060:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%9010:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838), )] (%9010:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)], %8977:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)]) -> (%9011:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=840))] (%9011:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)]) -> (%9012:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839)]) + graph.CallGraphOp @model.layers.22.mlp (%9012:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839)]) -> (%9017:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), )] (%9017:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)], %9011:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)]) -> (%9018:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) + cf.ReturnOp (%9018:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)]) -> () } } graph.SubGraphOp @model.layers.22.self_attn [using_qnn:true, symbol:model.layers.22.self_attn] { - (%1283:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %364:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %365:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%1315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=825)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=827)]) { - linalg.CPU.LinearOp (%1283:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)]) -> (%1284:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=819)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=816), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=815))] (%1283:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)]) -> (%1285:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=816)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=818), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=817))] (%1283:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)]) -> (%1286:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=818)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=819), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=819), )] (%1284:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=819)]) -> (%1284:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=819)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=819), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=819), )] (%1284:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=819)]) -> (%1287:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=819)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=816), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=816), )] (%1285:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=816)]) -> (%1285:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=816)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=816), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=816), )] (%1285:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=816)]) -> (%1288:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=816)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=818), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=818), )] (%1286:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=818)]) -> (%1286:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=818)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=818), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=818), )] (%1286:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=818)]) -> (%1289:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=818)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=819), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=820), )] (%1287:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=819)]) -> (%1290:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=820)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=816), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=822), )] (%1288:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=816)]) -> (%1291:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=822)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=820), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=820), )] (%1290:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=820)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1292:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=820)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=822), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=822), )] (%1291:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=822)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1293:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=822)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=822), outputs_0:QuantSpec(Raw(type: Float16), uuid=824), )] (%1293:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=822)]) -> (%1294:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=824)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=824), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=825), )] (%1294:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=824)]) -> (%1295:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=825)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=825), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=825), )] (%1295:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=825)]) -> (%1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=825)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=818), outputs_0:QuantSpec(Raw(type: Float16), uuid=826), )] (%1289:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=818)]) -> (%1297:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=826)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=826), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=827), )] (%1297:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=826)]) -> (%1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=827)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=825), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), )] (%364:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=825)]) -> (%1299:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=827), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), )] (%365:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=827)]) -> (%1300:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), )] (%1299:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) -> (%1301:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), )] (%1300:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%1302:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=820), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=828), )] (%1292:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=820)], %1301:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) -> (%1303:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=828)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=828), inputs_1:QuantSpec(Raw(type: Float32), uuid=829), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=828), )] (%1303:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=828)], %1304:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=829), constant:[0.088388346]]) -> (%1305:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=828)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=828), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=830), )] (%1305:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=828)]) -> (%1306:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=830)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=830), inputs_1:QuantSpec(Raw(type: Int16), uuid=831), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=830), )] (%1306:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=830)], %1307:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=831), constant:[-20]]) -> (%1308:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=830)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=832), outputs_0:QuantSpec(Raw(type: UInt8), uuid=833), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1309:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=832), constant:[-0.42773438]]) -> (%1310:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=833)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=833), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=828), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=830), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=830), )] (%1310:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=833)], %1305:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=828)], %1308:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=830)]) -> (%1311:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=830)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=830), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=834), )] (%1311:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=830)]) -> (%1312:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=834)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=834), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835), )] (%1312:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=834)], %1302:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%1313:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835), )] (%1313:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835)]) -> (%1314:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835), )] (%1314:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835)]) -> (%1314:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=836))] (%1314:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835)]) -> (%1315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837)]) - cf.ReturnOp (%1315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=825)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=827)]) -> () + (%8978:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8059:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8060:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%9010:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)]) { + linalg.CPU.LinearOp (%8978:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)]) -> (%8979:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=820)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=816))] (%8978:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)]) -> (%8980:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=818))] (%8978:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)]) -> (%8981:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=820), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=820), )] (%8979:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=820)]) -> (%8979:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=820)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=820), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=820), )] (%8979:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=820)]) -> (%8982:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=820)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817), )] (%8980:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817)]) -> (%8980:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817), )] (%8980:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817)]) -> (%8983:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819), )] (%8981:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819)]) -> (%8981:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819), )] (%8981:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819)]) -> (%8984:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=820), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=822))] (%8982:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=820)]) -> (%8985:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=823), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=824))] (%8983:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817)]) -> (%8986:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=823)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821), )] (%8985:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8987:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=823), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=823), )] (%8986:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=823)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8988:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=823)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=823), outputs_0:QuantSpec(Raw(type: Float16), uuid=825), )] (%8988:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=823)]) -> (%8989:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=825)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=825), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826), )] (%8989:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=825)]) -> (%8990:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826), )] (%8990:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)]) -> (%8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819), outputs_0:QuantSpec(Raw(type: Float16), uuid=827), )] (%8984:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819)]) -> (%8992:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=827)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=827), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828), )] (%8992:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=827)]) -> (%8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), )] (%8059:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)]) -> (%8994:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), )] (%8060:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)]) -> (%8995:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), )] (%8994:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) -> (%8996:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), )] (%8995:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%8997:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829), )] (%8987:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821)], %8996:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) -> (%8998:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829), inputs_1:QuantSpec(Raw(type: Float32), uuid=830), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829), )] (%8998:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829)], %8999:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=830), constant:[0.088388346]]) -> (%9000:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831), )] (%9000:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829)]) -> (%9001:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831), inputs_1:QuantSpec(Raw(type: Int16), uuid=832), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831), )] (%9001:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831)], %9002:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=832), constant:[-20]]) -> (%9003:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=833), outputs_0:QuantSpec(Raw(type: UInt8), uuid=834), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9004:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=833), constant:[-0.42773438]]) -> (%9005:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=834)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=834), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831), )] (%9005:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=834)], %9000:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829)], %9003:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831)]) -> (%9006:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835), )] (%9006:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831)]) -> (%9007:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836), )] (%9007:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835)], %8997:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%9008:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836), )] (%9008:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)]) -> (%9009:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836), )] (%9009:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)]) -> (%9009:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=837))] (%9009:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)]) -> (%9010:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)]) + cf.ReturnOp (%9010:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)]) -> () } } graph.SubGraphOp @model.layers.22.mlp [using_qnn:true, symbol:model.layers.22.mlp] { - (%1317:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)]) -> (%1322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=846)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=841), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=840))] (%1317:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)]) -> (%1318:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=841)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=841), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842), )] (%1318:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=841)]) -> (%1319:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=843))] (%1317:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)]) -> (%1320:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842), )] (%1319:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842)], %1320:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844)]) -> (%1321:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=846), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=845))] (%1321:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842)]) -> (%1322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=846)]) - cf.ReturnOp (%1322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=846)]) -> () + (%9012:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839)]) -> (%9017:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=841))] (%9012:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839)]) -> (%9013:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=843), )] (%9013:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842)]) -> (%9014:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=843)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=845), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=844))] (%9012:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839)]) -> (%9015:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=845)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=843), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=845), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=843), )] (%9014:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=843)], %9015:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=845)]) -> (%9016:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=843)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=843), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=846))] (%9016:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=843)]) -> (%9017:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) + cf.ReturnOp (%9017:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) -> () } } graph.SubGraphOp @model.layers.23 [using_qnn:true, symbol:model.layers.23] { - (%1323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=846)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %366:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %367:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%1364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=880)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=859)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=861)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=846), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), )] (%1323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=846)]) -> (%1324:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) - graph.CallGraphOp @model.layers.23.self_attn (%1324:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %366:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %367:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%1356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=871)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=859)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=861)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=871), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=846), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=871), )] (%1356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=871)], %1323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=846)]) -> (%1357:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=871)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=871), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872), )] (%1357:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=871)]) -> (%1358:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)]) - graph.CallGraphOp @model.layers.23.mlp (%1358:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)]) -> (%1363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=880)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=880), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=871), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=880), )] (%1363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=880)], %1357:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=871)]) -> (%1364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=880)]) - cf.ReturnOp (%1364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=880)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=859)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=861)]) -> () + (%9018:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8061:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8062:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%9059:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=849))] (%9018:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) -> (%9019:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848)]) + graph.CallGraphOp @model.layers.23.self_attn (%9019:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8061:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8062:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%9051:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872), )] (%9051:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)], %9018:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) -> (%9052:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=873), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874))] (%9052:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)]) -> (%9053:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=873)]) + graph.CallGraphOp @model.layers.23.mlp (%9053:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=873)]) -> (%9058:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881), )] (%9058:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)], %9052:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)]) -> (%9059:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) + cf.ReturnOp (%9059:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)]) -> () } } graph.SubGraphOp @model.layers.23.self_attn [using_qnn:true, symbol:model.layers.23.self_attn] { - (%1324:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %366:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %367:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%1356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=871)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=859)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=861)]) { - linalg.CPU.LinearOp (%1324:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) -> (%1325:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=853)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=850), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=849))] (%1324:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) -> (%1326:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=850)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=851))] (%1324:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) -> (%1327:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=853), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=853), )] (%1325:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=853)]) -> (%1325:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=853)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=853), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=853), )] (%1325:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=853)]) -> (%1328:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=853)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=850), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=850), )] (%1326:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=850)]) -> (%1326:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=850)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=850), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=850), )] (%1326:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=850)]) -> (%1329:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=850)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852), )] (%1327:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852)]) -> (%1327:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852), )] (%1327:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852)]) -> (%1330:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=853), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=854), )] (%1328:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=853)]) -> (%1331:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=854)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=850), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=856), )] (%1329:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=850)]) -> (%1332:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=856)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=854), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=854), )] (%1331:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=854)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1333:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=854)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=856), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=856), )] (%1332:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=856)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1334:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=856)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=856), outputs_0:QuantSpec(Raw(type: Float16), uuid=858), )] (%1334:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=856)]) -> (%1335:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=858)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=858), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=859), )] (%1335:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=858)]) -> (%1336:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=859)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=859), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=859), )] (%1336:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=859)]) -> (%1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=859)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852), outputs_0:QuantSpec(Raw(type: Float16), uuid=860), )] (%1330:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852)]) -> (%1338:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=860)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=860), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=861), )] (%1338:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=860)]) -> (%1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=861)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=859), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), )] (%366:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=859)]) -> (%1340:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=861), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), )] (%367:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=861)]) -> (%1341:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), )] (%1340:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) -> (%1342:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), )] (%1341:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%1343:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=854), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=862), )] (%1333:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=854)], %1342:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) -> (%1344:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=862)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=862), inputs_1:QuantSpec(Raw(type: Float32), uuid=863), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=862), )] (%1344:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=862)], %1345:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=863), constant:[0.088388346]]) -> (%1346:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=862)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=862), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864), )] (%1346:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=862)]) -> (%1347:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864), inputs_1:QuantSpec(Raw(type: Int16), uuid=865), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864), )] (%1347:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864)], %1348:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=865), constant:[-20]]) -> (%1349:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=866), outputs_0:QuantSpec(Raw(type: UInt8), uuid=867), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1350:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=866), constant:[0.96484375]]) -> (%1351:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=867)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=867), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=862), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864), )] (%1351:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=867)], %1346:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=862)], %1349:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864)]) -> (%1352:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=868), )] (%1352:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864)]) -> (%1353:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=868)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=868), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869), )] (%1353:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=868)], %1343:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%1354:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869), )] (%1354:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869)]) -> (%1355:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869), )] (%1355:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869)]) -> (%1355:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=871), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=870))] (%1355:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869)]) -> (%1356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=871)]) - cf.ReturnOp (%1356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=871)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=859)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=861)]) -> () + (%9019:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8061:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8062:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%9051:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)]) { + linalg.CPU.LinearOp (%9019:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848)]) -> (%9020:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=854)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=850))] (%9019:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848)]) -> (%9021:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=852))] (%9019:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848)]) -> (%9022:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=854), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=854), )] (%9020:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=854)]) -> (%9020:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=854)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=854), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=854), )] (%9020:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=854)]) -> (%9023:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=854)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851), )] (%9021:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851)]) -> (%9021:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851), )] (%9021:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851)]) -> (%9024:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853), )] (%9022:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853)]) -> (%9022:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853), )] (%9022:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853)]) -> (%9025:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=854), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=855), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=856))] (%9023:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=854)]) -> (%9026:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=855)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=858))] (%9024:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851)]) -> (%9027:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=855), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=855), )] (%9026:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=855)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9028:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=855)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857), )] (%9027:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9029:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857), outputs_0:QuantSpec(Raw(type: Float16), uuid=859), )] (%9029:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857)]) -> (%9030:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=859)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=859), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860), )] (%9030:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=859)]) -> (%9031:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860), )] (%9031:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)]) -> (%9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853), outputs_0:QuantSpec(Raw(type: Float16), uuid=861), )] (%9025:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853)]) -> (%9033:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=861)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=861), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862), )] (%9033:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=861)]) -> (%9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), )] (%8061:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)]) -> (%9035:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), )] (%8062:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)]) -> (%9036:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), )] (%9035:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) -> (%9037:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), )] (%9036:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%9038:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=855), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863), )] (%9028:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=855)], %9037:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) -> (%9039:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863), inputs_1:QuantSpec(Raw(type: Float32), uuid=864), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863), )] (%9039:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863)], %9040:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=864), constant:[0.088388346]]) -> (%9041:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865), )] (%9041:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863)]) -> (%9042:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865), inputs_1:QuantSpec(Raw(type: Int16), uuid=866), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865), )] (%9042:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865)], %9043:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=866), constant:[-20]]) -> (%9044:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=867), outputs_0:QuantSpec(Raw(type: UInt8), uuid=868), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9045:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=867), constant:[0.96484375]]) -> (%9046:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=868)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=868), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865), )] (%9046:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=868)], %9041:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863)], %9044:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865)]) -> (%9047:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869), )] (%9047:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865)]) -> (%9048:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870), )] (%9048:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869)], %9038:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%9049:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870), )] (%9049:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870)]) -> (%9050:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870), )] (%9050:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870)]) -> (%9050:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=871))] (%9050:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870)]) -> (%9051:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)]) + cf.ReturnOp (%9051:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)]) -> () } } graph.SubGraphOp @model.layers.23.mlp [using_qnn:true, symbol:model.layers.23.mlp] { - (%1358:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)]) -> (%1363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=880)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=875), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=874))] (%1358:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)]) -> (%1359:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=875)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=875), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=876), )] (%1359:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=875)]) -> (%1360:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=876)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=878), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=877))] (%1358:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)]) -> (%1361:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=878)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=876), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=878), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=876), )] (%1360:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=876)], %1361:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=878)]) -> (%1362:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=876)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=876), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=880), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=879))] (%1362:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=876)]) -> (%1363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=880)]) - cf.ReturnOp (%1363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=880)]) -> () + (%9053:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=873)]) -> (%9058:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=873), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=876), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=875))] (%9053:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=873)]) -> (%9054:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=876)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=876), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877), )] (%9054:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=876)]) -> (%9055:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=873), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=878))] (%9053:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=873)]) -> (%9056:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877), )] (%9055:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877)], %9056:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879)]) -> (%9057:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=880))] (%9057:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877)]) -> (%9058:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) + cf.ReturnOp (%9058:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) -> () } } graph.SubGraphOp @model.layers.24 [using_qnn:true, symbol:model.layers.24] { - (%1364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=880)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %368:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %369:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%1405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=914)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=893)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=895)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=880), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881), )] (%1364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=880)]) -> (%1365:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) - graph.CallGraphOp @model.layers.24.self_attn (%1365:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %368:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %369:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%1397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=905)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=893)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=895)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=905), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=880), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=905), )] (%1397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=905)], %1364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=880)]) -> (%1398:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=905)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=905), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906), )] (%1398:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=905)]) -> (%1399:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)]) - graph.CallGraphOp @model.layers.24.mlp (%1399:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)]) -> (%1404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=914)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=914), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=905), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=914), )] (%1404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=914)], %1398:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=905)]) -> (%1405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=914)]) - cf.ReturnOp (%1405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=914)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=893)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=895)]) -> () + (%9059:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8063:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8064:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%9100:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=883))] (%9059:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) -> (%9060:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882)]) + graph.CallGraphOp @model.layers.24.self_attn (%9060:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8063:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8064:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%9092:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906), )] (%9092:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)], %9059:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) -> (%9093:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=908))] (%9093:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)]) -> (%9094:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907)]) + graph.CallGraphOp @model.layers.24.mlp (%9094:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907)]) -> (%9099:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915), )] (%9099:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)], %9093:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)]) -> (%9100:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)]) + cf.ReturnOp (%9100:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)]) -> () } } graph.SubGraphOp @model.layers.24.self_attn [using_qnn:true, symbol:model.layers.24.self_attn] { - (%1365:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %368:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %369:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%1397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=905)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=893)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=895)]) { - linalg.CPU.LinearOp (%1365:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) -> (%1366:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=887)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=884), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=883))] (%1365:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) -> (%1367:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=884)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=886), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=885))] (%1365:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) -> (%1368:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=886)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=887), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=887), )] (%1366:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=887)]) -> (%1366:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=887)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=887), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=887), )] (%1366:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=887)]) -> (%1369:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=887)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=884), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=884), )] (%1367:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=884)]) -> (%1367:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=884)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=884), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=884), )] (%1367:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=884)]) -> (%1370:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=884)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=886), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=886), )] (%1368:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=886)]) -> (%1368:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=886)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=886), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=886), )] (%1368:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=886)]) -> (%1371:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=886)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=887), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=888), )] (%1369:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=887)]) -> (%1372:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=888)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=884), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=890), )] (%1370:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=884)]) -> (%1373:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=890)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=888), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=888), )] (%1372:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=888)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1374:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=888)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=890), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=890), )] (%1373:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=890)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1375:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=890)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=890), outputs_0:QuantSpec(Raw(type: Float16), uuid=892), )] (%1375:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=890)]) -> (%1376:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=892)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=892), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=893), )] (%1376:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=892)]) -> (%1377:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=893)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=893), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=893), )] (%1377:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=893)]) -> (%1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=893)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=886), outputs_0:QuantSpec(Raw(type: Float16), uuid=894), )] (%1371:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=886)]) -> (%1379:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=894)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=894), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=895), )] (%1379:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=894)]) -> (%1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=895)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=893), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), )] (%368:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=893)]) -> (%1381:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=895), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), )] (%369:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=895)]) -> (%1382:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), )] (%1381:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) -> (%1383:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), )] (%1382:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%1384:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=888), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896), )] (%1374:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=888)], %1383:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) -> (%1385:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896), inputs_1:QuantSpec(Raw(type: Float32), uuid=897), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896), )] (%1385:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896)], %1386:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=897), constant:[0.088388346]]) -> (%1387:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=898), )] (%1387:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896)]) -> (%1388:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=898)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=898), inputs_1:QuantSpec(Raw(type: Int16), uuid=899), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=898), )] (%1388:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=898)], %1389:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=899), constant:[-20]]) -> (%1390:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=898)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=900), outputs_0:QuantSpec(Raw(type: UInt8), uuid=901), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1391:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=900), constant:[0.07910156]]) -> (%1392:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=901)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=901), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=898), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=898), )] (%1392:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=901)], %1387:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896)], %1390:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=898)]) -> (%1393:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=898)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=898), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=902), )] (%1393:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=898)]) -> (%1394:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=902)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=902), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903), )] (%1394:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=902)], %1384:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%1395:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903), )] (%1395:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903)]) -> (%1396:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903), )] (%1396:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903)]) -> (%1396:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=905), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=904))] (%1396:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903)]) -> (%1397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=905)]) - cf.ReturnOp (%1397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=905)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=893)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=895)]) -> () + (%9060:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8063:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8064:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%9092:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)]) { + linalg.CPU.LinearOp (%9060:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882)]) -> (%9061:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=888)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=884))] (%9060:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882)]) -> (%9062:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=886))] (%9060:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882)]) -> (%9063:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=888), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=888), )] (%9061:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=888)]) -> (%9061:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=888)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=888), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=888), )] (%9061:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=888)]) -> (%9064:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=888)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885), )] (%9062:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885)]) -> (%9062:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885), )] (%9062:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885)]) -> (%9065:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887), )] (%9063:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887)]) -> (%9063:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887), )] (%9063:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887)]) -> (%9066:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=888), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=890))] (%9064:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=888)]) -> (%9067:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=891), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=892))] (%9065:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885)]) -> (%9068:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=891)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889), )] (%9067:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9069:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=891), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=891), )] (%9068:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=891)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9070:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=891)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=891), outputs_0:QuantSpec(Raw(type: Float16), uuid=893), )] (%9070:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=891)]) -> (%9071:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=893)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=893), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894), )] (%9071:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=893)]) -> (%9072:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894), )] (%9072:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)]) -> (%9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887), outputs_0:QuantSpec(Raw(type: Float16), uuid=895), )] (%9066:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887)]) -> (%9074:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=895)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=895), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896), )] (%9074:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=895)]) -> (%9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), )] (%8063:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)]) -> (%9076:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), )] (%8064:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)]) -> (%9077:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), )] (%9076:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) -> (%9078:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), )] (%9077:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%9079:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897), )] (%9069:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889)], %9078:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) -> (%9080:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897), inputs_1:QuantSpec(Raw(type: Float32), uuid=898), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897), )] (%9080:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897)], %9081:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=898), constant:[0.088388346]]) -> (%9082:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899), )] (%9082:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897)]) -> (%9083:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899), inputs_1:QuantSpec(Raw(type: Int16), uuid=900), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899), )] (%9083:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899)], %9084:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=900), constant:[-20]]) -> (%9085:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=901), outputs_0:QuantSpec(Raw(type: UInt8), uuid=902), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9086:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=901), constant:[0.07910156]]) -> (%9087:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=902)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=902), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899), )] (%9087:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=902)], %9082:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897)], %9085:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899)]) -> (%9088:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903), )] (%9088:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899)]) -> (%9089:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904), )] (%9089:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903)], %9079:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%9090:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904), )] (%9090:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)]) -> (%9091:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904), )] (%9091:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)]) -> (%9091:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=905))] (%9091:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)]) -> (%9092:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)]) + cf.ReturnOp (%9092:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)]) -> () } } graph.SubGraphOp @model.layers.24.mlp [using_qnn:true, symbol:model.layers.24.mlp] { - (%1399:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)]) -> (%1404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=914)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=909), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=908))] (%1399:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)]) -> (%1400:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=909)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=909), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=910), )] (%1400:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=909)]) -> (%1401:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=910)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=912), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=911))] (%1399:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)]) -> (%1402:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=912)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=910), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=912), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=910), )] (%1401:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=910)], %1402:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=912)]) -> (%1403:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=910)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=910), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=914), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=913))] (%1403:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=910)]) -> (%1404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=914)]) - cf.ReturnOp (%1404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=914)]) -> () + (%9094:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907)]) -> (%9099:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=910), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=909))] (%9094:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907)]) -> (%9095:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=910)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=910), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=911), )] (%9095:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=910)]) -> (%9096:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=911)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=913), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=912))] (%9094:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907)]) -> (%9097:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=913)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=911), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=913), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=911), )] (%9096:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=911)], %9097:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=913)]) -> (%9098:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=911)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=911), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=914))] (%9098:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=911)]) -> (%9099:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)]) + cf.ReturnOp (%9099:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)]) -> () } } graph.SubGraphOp @model.layers.25 [using_qnn:true, symbol:model.layers.25] { - (%1405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=914)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %370:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %371:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%1446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=948)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=927)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=929)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=914), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915), )] (%1405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=914)]) -> (%1406:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)]) - graph.CallGraphOp @model.layers.25.self_attn (%1406:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %370:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %371:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%1438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=939)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=927)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=929)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=939), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=914), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=939), )] (%1438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=939)], %1405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=914)]) -> (%1439:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=939)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=939), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940), )] (%1439:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=939)]) -> (%1440:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)]) - graph.CallGraphOp @model.layers.25.mlp (%1440:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)]) -> (%1445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=948)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=948), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=939), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=948), )] (%1445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=948)], %1439:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=939)]) -> (%1446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=948)]) - cf.ReturnOp (%1446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=948)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=927)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=929)]) -> () + (%9100:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8065:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8066:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%9141:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=917))] (%9100:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)]) -> (%9101:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916)]) + graph.CallGraphOp @model.layers.25.self_attn (%9101:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8065:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8066:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%9133:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940), )] (%9133:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)], %9100:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)]) -> (%9134:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=941), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=942))] (%9134:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)]) -> (%9135:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=941)]) + graph.CallGraphOp @model.layers.25.mlp (%9135:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=941)]) -> (%9140:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949), )] (%9140:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)], %9134:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)]) -> (%9141:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)]) + cf.ReturnOp (%9141:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)]) -> () } } graph.SubGraphOp @model.layers.25.self_attn [using_qnn:true, symbol:model.layers.25.self_attn] { - (%1406:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %370:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %371:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%1438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=939)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=927)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=929)]) { - linalg.CPU.LinearOp (%1406:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)]) -> (%1407:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=921)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=918), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=917))] (%1406:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)]) -> (%1408:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=918)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=920), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=919))] (%1406:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)]) -> (%1409:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=920)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=921), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=921), )] (%1407:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=921)]) -> (%1407:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=921)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=921), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=921), )] (%1407:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=921)]) -> (%1410:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=921)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=918), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=918), )] (%1408:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=918)]) -> (%1408:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=918)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=918), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=918), )] (%1408:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=918)]) -> (%1411:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=918)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=920), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=920), )] (%1409:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=920)]) -> (%1409:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=920)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=920), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=920), )] (%1409:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=920)]) -> (%1412:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=920)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=921), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=922), )] (%1410:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=921)]) -> (%1413:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=922)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=918), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=924), )] (%1411:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=918)]) -> (%1414:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=924)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=922), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=922), )] (%1413:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=922)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1415:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=922)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=924), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=924), )] (%1414:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=924)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1416:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=924)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=924), outputs_0:QuantSpec(Raw(type: Float16), uuid=926), )] (%1416:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=924)]) -> (%1417:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=926)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=926), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=927), )] (%1417:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=926)]) -> (%1418:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=927)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=927), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=927), )] (%1418:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=927)]) -> (%1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=927)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=920), outputs_0:QuantSpec(Raw(type: Float16), uuid=928), )] (%1412:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=920)]) -> (%1420:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=928)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=928), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=929), )] (%1420:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=928)]) -> (%1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=929)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=927), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), )] (%370:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=927)]) -> (%1422:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=929), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), )] (%371:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=929)]) -> (%1423:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), )] (%1422:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) -> (%1424:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), )] (%1423:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%1425:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=922), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=930), )] (%1415:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=922)], %1424:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) -> (%1426:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=930)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=930), inputs_1:QuantSpec(Raw(type: Float32), uuid=931), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=930), )] (%1426:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=930)], %1427:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=931), constant:[0.088388346]]) -> (%1428:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=930)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=930), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=932), )] (%1428:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=930)]) -> (%1429:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=932)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=932), inputs_1:QuantSpec(Raw(type: Int16), uuid=933), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=932), )] (%1429:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=932)], %1430:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=933), constant:[-20]]) -> (%1431:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=932)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=934), outputs_0:QuantSpec(Raw(type: UInt8), uuid=935), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1432:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=934), constant:[-0.9921875]]) -> (%1433:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=935)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=935), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=930), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=932), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=932), )] (%1433:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=935)], %1428:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=930)], %1431:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=932)]) -> (%1434:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=932)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=932), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=936), )] (%1434:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=932)]) -> (%1435:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=936)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=936), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937), )] (%1435:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=936)], %1425:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%1436:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937), )] (%1436:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937)]) -> (%1437:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937), )] (%1437:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937)]) -> (%1437:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=939), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=938))] (%1437:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937)]) -> (%1438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=939)]) - cf.ReturnOp (%1438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=939)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=927)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=929)]) -> () + (%9101:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8065:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8066:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%9133:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)]) { + linalg.CPU.LinearOp (%9101:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916)]) -> (%9102:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=922)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=918))] (%9101:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916)]) -> (%9103:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=920))] (%9101:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916)]) -> (%9104:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=922), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=922), )] (%9102:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=922)]) -> (%9102:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=922)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=922), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=922), )] (%9102:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=922)]) -> (%9105:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=922)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919), )] (%9103:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919)]) -> (%9103:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919), )] (%9103:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919)]) -> (%9106:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921), )] (%9104:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921)]) -> (%9104:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921), )] (%9104:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921)]) -> (%9107:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=922), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=923), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=924))] (%9105:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=922)]) -> (%9108:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=923)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=925), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=926))] (%9106:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919)]) -> (%9109:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=925)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=923), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=923), )] (%9108:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=923)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9110:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=923)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=925), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=925), )] (%9109:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=925)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9111:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=925)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=925), outputs_0:QuantSpec(Raw(type: Float16), uuid=927), )] (%9111:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=925)]) -> (%9112:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=927)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=927), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928), )] (%9112:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=927)]) -> (%9113:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928), )] (%9113:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)]) -> (%9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921), outputs_0:QuantSpec(Raw(type: Float16), uuid=929), )] (%9107:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921)]) -> (%9115:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=929)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=929), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930), )] (%9115:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=929)]) -> (%9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), )] (%8065:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)]) -> (%9117:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), )] (%8066:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)]) -> (%9118:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), )] (%9117:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) -> (%9119:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), )] (%9118:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%9120:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=923), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931), )] (%9110:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=923)], %9119:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) -> (%9121:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931), inputs_1:QuantSpec(Raw(type: Float32), uuid=932), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931), )] (%9121:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931)], %9122:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=932), constant:[0.088388346]]) -> (%9123:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933), )] (%9123:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931)]) -> (%9124:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933), inputs_1:QuantSpec(Raw(type: Int16), uuid=934), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933), )] (%9124:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933)], %9125:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=934), constant:[-20]]) -> (%9126:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=935), outputs_0:QuantSpec(Raw(type: UInt8), uuid=936), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9127:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=935), constant:[-0.9921875]]) -> (%9128:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=936)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=936), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933), )] (%9128:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=936)], %9123:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931)], %9126:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933)]) -> (%9129:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937), )] (%9129:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933)]) -> (%9130:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938), )] (%9130:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937)], %9120:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%9131:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938), )] (%9131:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938)]) -> (%9132:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938), )] (%9132:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938)]) -> (%9132:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=939))] (%9132:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938)]) -> (%9133:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)]) + cf.ReturnOp (%9133:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)]) -> () } } graph.SubGraphOp @model.layers.25.mlp [using_qnn:true, symbol:model.layers.25.mlp] { - (%1440:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)]) -> (%1445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=948)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=943), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=942))] (%1440:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)]) -> (%1441:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=943)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=943), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=944), )] (%1441:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=943)]) -> (%1442:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=944)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=946), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=945))] (%1440:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)]) -> (%1443:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=946)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=944), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=946), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=944), )] (%1442:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=944)], %1443:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=946)]) -> (%1444:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=944)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=944), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=948), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=947))] (%1444:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=944)]) -> (%1445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=948)]) - cf.ReturnOp (%1445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=948)]) -> () + (%9135:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=941)]) -> (%9140:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=941), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=944), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=943))] (%9135:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=941)]) -> (%9136:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=944)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=944), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=945), )] (%9136:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=944)]) -> (%9137:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=945)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=941), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=947), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=946))] (%9135:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=941)]) -> (%9138:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=947)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=945), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=947), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=945), )] (%9137:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=945)], %9138:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=947)]) -> (%9139:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=945)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=945), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=948))] (%9139:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=945)]) -> (%9140:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)]) + cf.ReturnOp (%9140:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)]) -> () } } graph.SubGraphOp @model.layers.26 [using_qnn:true, symbol:model.layers.26] { - (%1446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=948)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %372:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %373:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%1487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=982)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=961)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=963)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=948), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949), )] (%1446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=948)]) -> (%1447:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)]) - graph.CallGraphOp @model.layers.26.self_attn (%1447:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %372:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %373:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%1479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=973)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=961)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=963)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=973), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=948), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=973), )] (%1479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=973)], %1446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=948)]) -> (%1480:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=973)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=973), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974), )] (%1480:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=973)]) -> (%1481:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)]) - graph.CallGraphOp @model.layers.26.mlp (%1481:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)]) -> (%1486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=982)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=982), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=973), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=982), )] (%1486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=982)], %1480:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=973)]) -> (%1487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=982)]) - cf.ReturnOp (%1487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=982)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=961)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=963)]) -> () + (%9141:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8067:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8068:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%9182:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=951))] (%9141:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)]) -> (%9142:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950)]) + graph.CallGraphOp @model.layers.26.self_attn (%9142:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8067:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8068:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%9174:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974), )] (%9174:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)], %9141:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)]) -> (%9175:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=975), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=976))] (%9175:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)]) -> (%9176:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=975)]) + graph.CallGraphOp @model.layers.26.mlp (%9176:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=975)]) -> (%9181:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983), )] (%9181:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)], %9175:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)]) -> (%9182:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)]) + cf.ReturnOp (%9182:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)]) -> () } } graph.SubGraphOp @model.layers.26.self_attn [using_qnn:true, symbol:model.layers.26.self_attn] { - (%1447:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %372:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %373:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%1479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=973)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=961)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=963)]) { - linalg.CPU.LinearOp (%1447:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)]) -> (%1448:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=955)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=952), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=951))] (%1447:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)]) -> (%1449:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=952)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=954), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=953))] (%1447:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)]) -> (%1450:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=954)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=955), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=955), )] (%1448:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=955)]) -> (%1448:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=955)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=955), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=955), )] (%1448:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=955)]) -> (%1451:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=955)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=952), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=952), )] (%1449:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=952)]) -> (%1449:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=952)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=952), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=952), )] (%1449:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=952)]) -> (%1452:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=952)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=954), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=954), )] (%1450:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=954)]) -> (%1450:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=954)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=954), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=954), )] (%1450:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=954)]) -> (%1453:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=954)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=955), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=956), )] (%1451:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=955)]) -> (%1454:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=956)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=952), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=958), )] (%1452:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=952)]) -> (%1455:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=958)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=956), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=956), )] (%1454:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=956)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1456:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=956)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=958), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=958), )] (%1455:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=958)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1457:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=958)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=958), outputs_0:QuantSpec(Raw(type: Float16), uuid=960), )] (%1457:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=958)]) -> (%1458:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=960)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=960), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=961), )] (%1458:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=960)]) -> (%1459:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=961)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=961), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=961), )] (%1459:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=961)]) -> (%1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=961)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=954), outputs_0:QuantSpec(Raw(type: Float16), uuid=962), )] (%1453:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=954)]) -> (%1461:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=962)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=962), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=963), )] (%1461:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=962)]) -> (%1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=963)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=961), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), )] (%372:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=961)]) -> (%1463:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=963), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), )] (%373:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=963)]) -> (%1464:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), )] (%1463:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) -> (%1465:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), )] (%1464:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%1466:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=956), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=964), )] (%1456:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=956)], %1465:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) -> (%1467:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=964)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=964), inputs_1:QuantSpec(Raw(type: Float32), uuid=965), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=964), )] (%1467:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=964)], %1468:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=965), constant:[0.088388346]]) -> (%1469:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=964)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=964), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=966), )] (%1469:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=964)]) -> (%1470:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=966)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=966), inputs_1:QuantSpec(Raw(type: Int16), uuid=967), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=966), )] (%1470:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=966)], %1471:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=967), constant:[-20]]) -> (%1472:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=966)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=968), outputs_0:QuantSpec(Raw(type: UInt8), uuid=969), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1473:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=968), constant:[0.27929688]]) -> (%1474:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=969)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=969), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=964), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=966), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=966), )] (%1474:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=969)], %1469:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=964)], %1472:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=966)]) -> (%1475:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=966)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=966), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=970), )] (%1475:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=966)]) -> (%1476:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=970)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=970), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971), )] (%1476:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=970)], %1466:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%1477:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971), )] (%1477:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971)]) -> (%1478:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971), )] (%1478:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971)]) -> (%1478:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=973), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=972))] (%1478:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971)]) -> (%1479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=973)]) - cf.ReturnOp (%1479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=973)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=961)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=963)]) -> () + (%9142:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8067:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8068:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%9174:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)]) { + linalg.CPU.LinearOp (%9142:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950)]) -> (%9143:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=956)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=952))] (%9142:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950)]) -> (%9144:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=954))] (%9142:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950)]) -> (%9145:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=956), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=956), )] (%9143:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=956)]) -> (%9143:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=956)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=956), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=956), )] (%9143:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=956)]) -> (%9146:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=956)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953), )] (%9144:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953)]) -> (%9144:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953), )] (%9144:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953)]) -> (%9147:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955), )] (%9145:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955)]) -> (%9145:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955), )] (%9145:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955)]) -> (%9148:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=956), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=957), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=958))] (%9146:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=956)]) -> (%9149:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=957)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=959), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=960))] (%9147:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953)]) -> (%9150:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=959)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=957), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=957), )] (%9149:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=957)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9151:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=957)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=959), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=959), )] (%9150:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=959)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9152:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=959)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=959), outputs_0:QuantSpec(Raw(type: Float16), uuid=961), )] (%9152:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=959)]) -> (%9153:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=961)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=961), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962), )] (%9153:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=961)]) -> (%9154:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962), )] (%9154:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)]) -> (%9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955), outputs_0:QuantSpec(Raw(type: Float16), uuid=963), )] (%9148:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955)]) -> (%9156:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=963)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=963), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964), )] (%9156:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=963)]) -> (%9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), )] (%8067:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)]) -> (%9158:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), )] (%8068:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)]) -> (%9159:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), )] (%9158:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) -> (%9160:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), )] (%9159:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%9161:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=957), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965), )] (%9151:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=957)], %9160:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) -> (%9162:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965), inputs_1:QuantSpec(Raw(type: Float32), uuid=966), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965), )] (%9162:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965)], %9163:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=966), constant:[0.088388346]]) -> (%9164:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967), )] (%9164:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965)]) -> (%9165:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967), inputs_1:QuantSpec(Raw(type: Int16), uuid=968), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967), )] (%9165:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967)], %9166:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=968), constant:[-20]]) -> (%9167:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=969), outputs_0:QuantSpec(Raw(type: UInt8), uuid=970), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9168:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=969), constant:[0.27929688]]) -> (%9169:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=970)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=970), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967), )] (%9169:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=970)], %9164:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965)], %9167:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967)]) -> (%9170:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971), )] (%9170:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967)]) -> (%9171:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972), )] (%9171:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971)], %9161:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%9172:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972), )] (%9172:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972)]) -> (%9173:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972), )] (%9173:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972)]) -> (%9173:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=973))] (%9173:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972)]) -> (%9174:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)]) + cf.ReturnOp (%9174:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)]) -> () } } graph.SubGraphOp @model.layers.26.mlp [using_qnn:true, symbol:model.layers.26.mlp] { - (%1481:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)]) -> (%1486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=982)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=977), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=976))] (%1481:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)]) -> (%1482:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=977)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=977), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=978), )] (%1482:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=977)]) -> (%1483:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=978)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=980), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=979))] (%1481:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)]) -> (%1484:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=980)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=978), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=980), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=978), )] (%1483:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=978)], %1484:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=980)]) -> (%1485:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=978)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=978), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=982), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=981))] (%1485:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=978)]) -> (%1486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=982)]) - cf.ReturnOp (%1486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=982)]) -> () + (%9176:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=975)]) -> (%9181:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=975), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=978), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=977))] (%9176:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=975)]) -> (%9177:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=978)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=978), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=979), )] (%9177:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=978)]) -> (%9178:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=979)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=975), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=981), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=980))] (%9176:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=975)]) -> (%9179:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=981)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=979), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=981), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=979), )] (%9178:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=979)], %9179:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=981)]) -> (%9180:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=979)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=979), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=982))] (%9180:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=979)]) -> (%9181:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)]) + cf.ReturnOp (%9181:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)]) -> () } } graph.SubGraphOp @model.layers.27 [using_qnn:true, symbol:model.layers.27] { - (%1487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=982)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %374:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %375:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%1528:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1016)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=995)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=997)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=982), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983), )] (%1487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=982)]) -> (%1488:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)]) - graph.CallGraphOp @model.layers.27.self_attn (%1488:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %374:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %375:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%1520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1007)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=995)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=997)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1007), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=982), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1007), )] (%1520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1007)], %1487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=982)]) -> (%1521:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1007)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1007), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008), )] (%1521:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1007)]) -> (%1522:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)]) - graph.CallGraphOp @model.layers.27.mlp (%1522:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)]) -> (%1527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1016)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1016), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1007), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1016), )] (%1527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1016)], %1521:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1007)]) -> (%1528:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1016)]) - cf.ReturnOp (%1528:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1016)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=995)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=997)]) -> () + (%9182:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8069:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8070:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%9223:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=985))] (%9182:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)]) -> (%9183:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984)]) + graph.CallGraphOp @model.layers.27.self_attn (%9183:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8069:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8070:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%9215:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008), )] (%9215:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)], %9182:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)]) -> (%9216:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1009), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1010))] (%9216:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)]) -> (%9217:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1009)]) + graph.CallGraphOp @model.layers.27.mlp (%9217:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1009)]) -> (%9222:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017), )] (%9222:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)], %9216:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)]) -> (%9223:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)]) + cf.ReturnOp (%9223:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) -> () } } graph.SubGraphOp @model.layers.27.self_attn [using_qnn:true, symbol:model.layers.27.self_attn] { - (%1488:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %374:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %375:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%1520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1007)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=995)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=997)]) { - linalg.CPU.LinearOp (%1488:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)]) -> (%1489:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=989)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=986), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=985))] (%1488:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)]) -> (%1490:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=986)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=988), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=987))] (%1488:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)]) -> (%1491:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=988)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=989), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=989), )] (%1489:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=989)]) -> (%1489:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=989)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=989), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=989), )] (%1489:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=989)]) -> (%1492:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=989)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=986), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=986), )] (%1490:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=986)]) -> (%1490:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=986)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=986), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=986), )] (%1490:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=986)]) -> (%1493:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=986)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=988), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=988), )] (%1491:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=988)]) -> (%1491:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=988)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=988), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=988), )] (%1491:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=988)]) -> (%1494:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=988)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=989), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=990), )] (%1492:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=989)]) -> (%1495:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=990)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=986), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=992), )] (%1493:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=986)]) -> (%1496:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=992)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=990), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=990), )] (%1495:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=990)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1497:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=990)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=992), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=992), )] (%1496:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=992)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1498:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=992)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=992), outputs_0:QuantSpec(Raw(type: Float16), uuid=994), )] (%1498:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=992)]) -> (%1499:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=994)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=994), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=995), )] (%1499:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=994)]) -> (%1500:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=995)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=995), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=995), )] (%1500:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=995)]) -> (%1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=995)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=988), outputs_0:QuantSpec(Raw(type: Float16), uuid=996), )] (%1494:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=988)]) -> (%1502:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=996)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=996), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=997), )] (%1502:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=996)]) -> (%1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=997)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=995), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), )] (%374:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=995)]) -> (%1504:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=997), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), )] (%375:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=997)]) -> (%1505:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), )] (%1504:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) -> (%1506:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), )] (%1505:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%1507:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=990), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=998), )] (%1497:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=990)], %1506:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) -> (%1508:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=998)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=998), inputs_1:QuantSpec(Raw(type: Float32), uuid=999), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=998), )] (%1508:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=998)], %1509:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=999), constant:[0.088388346]]) -> (%1510:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=998)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=998), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1000), )] (%1510:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=998)]) -> (%1511:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1000)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1000), inputs_1:QuantSpec(Raw(type: Int16), uuid=1001), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1000), )] (%1511:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1000)], %1512:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=1001), constant:[-20]]) -> (%1513:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1000)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=1002), outputs_0:QuantSpec(Raw(type: UInt8), uuid=1003), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1514:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=1002), constant:[0.890625]]) -> (%1515:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1003)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=1003), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=998), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1000), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1000), )] (%1515:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1003)], %1510:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=998)], %1513:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1000)]) -> (%1516:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1000)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1000), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1004), )] (%1516:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1000)]) -> (%1517:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1004)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1004), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005), )] (%1517:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1004)], %1507:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%1518:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005), )] (%1518:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005)]) -> (%1519:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005), )] (%1519:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005)]) -> (%1519:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1007), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1006))] (%1519:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005)]) -> (%1520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1007)]) - cf.ReturnOp (%1520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1007)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=995)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=997)]) -> () + (%9183:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8069:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8070:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%9215:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) { + linalg.CPU.LinearOp (%9183:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984)]) -> (%9184:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=990)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=986))] (%9183:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984)]) -> (%9185:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=988))] (%9183:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984)]) -> (%9186:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=990), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=990), )] (%9184:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=990)]) -> (%9184:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=990)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=990), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=990), )] (%9184:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=990)]) -> (%9187:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=990)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987), )] (%9185:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987)]) -> (%9185:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987), )] (%9185:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987)]) -> (%9188:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989), )] (%9186:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989)]) -> (%9186:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989), )] (%9186:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989)]) -> (%9189:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=990), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=991), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=992))] (%9187:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=990)]) -> (%9190:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=991)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=993), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=994))] (%9188:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987)]) -> (%9191:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=993)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=991), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=991), )] (%9190:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=991)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9192:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=991)]) + linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=993), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=993), )] (%9191:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=993)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9193:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=993)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=993), outputs_0:QuantSpec(Raw(type: Float16), uuid=995), )] (%9193:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=993)]) -> (%9194:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=995)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=995), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996), )] (%9194:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=995)]) -> (%9195:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996), )] (%9195:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)]) -> (%9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989), outputs_0:QuantSpec(Raw(type: Float16), uuid=997), )] (%9189:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989)]) -> (%9197:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=997)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=997), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998), )] (%9197:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=997)]) -> (%9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), )] (%8069:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)]) -> (%9199:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), )] (%8070:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) -> (%9200:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), )] (%9199:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) -> (%9201:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), )] (%9200:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%9202:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=991), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999), )] (%9192:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=991)], %9201:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) -> (%9203:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999), inputs_1:QuantSpec(Raw(type: Float32), uuid=1000), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999), )] (%9203:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999)], %9204:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1000), constant:[0.088388346]]) -> (%9205:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001), )] (%9205:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999)]) -> (%9206:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001), inputs_1:QuantSpec(Raw(type: Int16), uuid=1002), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001), )] (%9206:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001)], %9207:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=1002), constant:[-20]]) -> (%9208:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=1003), outputs_0:QuantSpec(Raw(type: UInt8), uuid=1004), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9209:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=1003), constant:[0.890625]]) -> (%9210:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1004)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=1004), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001), )] (%9210:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1004)], %9205:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999)], %9208:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001)]) -> (%9211:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005), )] (%9211:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001)]) -> (%9212:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006), )] (%9212:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005)], %9202:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%9213:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006), )] (%9213:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006)]) -> (%9214:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006), )] (%9214:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006)]) -> (%9214:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1007))] (%9214:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006)]) -> (%9215:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)]) + cf.ReturnOp (%9215:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) -> () } } graph.SubGraphOp @model.layers.27.mlp [using_qnn:true, symbol:model.layers.27.mlp] { - (%1522:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)]) -> (%1527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1016)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1011), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1010))] (%1522:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)]) -> (%1523:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1011)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1011), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1012), )] (%1523:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1011)]) -> (%1524:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1012)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1014), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1013))] (%1522:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)]) -> (%1525:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1014)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1012), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1014), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1012), )] (%1524:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1012)], %1525:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1014)]) -> (%1526:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1012)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1012), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1016), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1015))] (%1526:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1012)]) -> (%1527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1016)]) - cf.ReturnOp (%1527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1016)]) -> () + (%9217:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1009)]) -> (%9222:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1009), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1012), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1011))] (%9217:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1009)]) -> (%9218:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1012)]) + linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1012), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1013), )] (%9218:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1012)]) -> (%9219:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1013)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1009), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1015), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1014))] (%9217:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1009)]) -> (%9220:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1015)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1013), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1015), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1013), )] (%9219:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1013)], %9220:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1015)]) -> (%9221:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1013)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1013), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1016))] (%9221:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1013)]) -> (%9222:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)]) + cf.ReturnOp (%9222:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)]) -> () } } // ╔═════╗ diff --git a/mllm/backends/cpu/CPUBackend.cpp b/mllm/backends/cpu/CPUBackend.cpp index a63fcd366..f8a3d8d1c 100644 --- a/mllm/backends/cpu/CPUBackend.cpp +++ b/mllm/backends/cpu/CPUBackend.cpp @@ -50,6 +50,7 @@ #include "mllm/backends/cpu/ops/STFTOp.hpp" #include "mllm/backends/cpu/ops/Scatter2ShardsOp.hpp" #include "mllm/backends/cpu/ops/SiLUOp.hpp" +#include "mllm/backends/cpu/ops/SigmoidOp.hpp" #include "mllm/backends/cpu/ops/SliceOp.hpp" #include "mllm/backends/cpu/ops/SoftmaxOp.hpp" #include "mllm/backends/cpu/ops/SplitOp.hpp" @@ -73,11 +74,11 @@ CPUBackend::CPUBackend() : Backend(kCPU, createCPUAllocator()) { CPUReduceSumOpFactory, CPUTransposeOpFactory, CPUPermuteOpFactory, CPUCastTypeOpFactory, CPUConcatOpFactory, CPUStackOpFactory, CPUContiguousOpFactory, CPUCopyOpFactory, CPUEmbeddingOpFactory, CPUSplitOpFactory, CPUViewOpFactory, CPULayerNormOpFactory, CPURepeatOpFactory, CPUX2XOpFactory, CPUSoftmaxOpFactory, - CPUSiLUOpFactory, CPURMSNormOpFactory, CPUGELUOpFactory, CPUQuickGELUOpFactory, CPUReLUOpFactory, - CPUMatMulOpFactory, CPUFlashAttention2OpFactory, CPUSliceOpFactory, CPUVisionRoPEOpFactory, CPUParamOpFactory, - CPUMultimodalRoPEOpFactory, CPURoPEOpFactory, CPUCausalMaskOpFactory, CPUConv1DOpFactory, CPUConv3DOpFactory, - CPUSTFTOpFactory, CPUISTFTOpFactory, CPUIndexOpFactory, CPUTopKOpFactory, CPUClipOpFactory, CPUMeanOpFactory, - CPUKVCacheOpFactory, CPUPagedAttnOpFactory, CPUScatter2ShardsOpFactory, CPURadixAttnOpFactory, + CPUSiLUOpFactory, CPUSigmoidOpFactory, CPURMSNormOpFactory, CPUGELUOpFactory, CPUQuickGELUOpFactory, + CPUReLUOpFactory, CPUMatMulOpFactory, CPUFlashAttention2OpFactory, CPUSliceOpFactory, CPUVisionRoPEOpFactory, + CPUParamOpFactory, CPUMultimodalRoPEOpFactory, CPURoPEOpFactory, CPUCausalMaskOpFactory, CPUConv1DOpFactory, + CPUConv3DOpFactory, CPUSTFTOpFactory, CPUISTFTOpFactory, CPUIndexOpFactory, CPUTopKOpFactory, CPUClipOpFactory, + CPUMeanOpFactory, CPUKVCacheOpFactory, CPUPagedAttnOpFactory, CPUScatter2ShardsOpFactory, CPURadixAttnOpFactory, CPUConv2DOpFactory, CPULayerNorm2DOpFactory, CPUInterpolateOpFactory, CPUPadOpFactory, CPUMaskedScatterOpFactory, CPUArgsortOpFactory, CPUCloneOpFactory, CPUAvgPool1dOpFactory, CPUFlashAttention2SwaSinkOpFactory, CPURadixAttnRelaxOpFactory, CPURadixAttnSwaSinkOpFactory, CPUEqualOpFactory, CPUWhereOpFactory>(); diff --git a/mllm/backends/cpu/kernels/Kernels.hpp b/mllm/backends/cpu/kernels/Kernels.hpp index 3d3ee9c8e..e8c05dfac 100644 --- a/mllm/backends/cpu/kernels/Kernels.hpp +++ b/mllm/backends/cpu/kernels/Kernels.hpp @@ -10,6 +10,7 @@ #if defined(MLLM_HOST_ARCH_X86_64) || defined(MLLM_HOST_ARCH_X86) #include "mllm/backends/cpu/kernels/x86/fill.hpp" // IWYU pragma: export #include "mllm/backends/cpu/kernels/x86/silu.hpp" // IWYU pragma: export +#include "mllm/backends/cpu/kernels/x86/sigmoid.hpp" // IWYU pragma: export #include "mllm/backends/cpu/kernels/x86/softmax.hpp" // IWYU pragma: export #include "mllm/backends/cpu/kernels/x86/rmsnorm.hpp" // IWYU pragma: export #include "mllm/backends/cpu/kernels/x86/gelu.hpp" // IWYU pragma: export @@ -22,6 +23,7 @@ #include "mllm/backends/cpu/kernels/arm/transpose.hpp" // IWYU pragma: export #include "mllm/backends/cpu/kernels/arm/permute.hpp" // IWYU pragma: export #include "mllm/backends/cpu/kernels/arm/silu.hpp" // IWYU pragma: export +#include "mllm/backends/cpu/kernels/arm/sigmoid.hpp" // IWYU pragma: export #include "mllm/backends/cpu/kernels/arm/cast_types.hpp" // IWYU pragma: export #include "mllm/backends/cpu/kernels/arm/layernorm.hpp" // IWYU pragma: export #include "mllm/backends/cpu/kernels/arm/softmax.hpp" // IWYU pragma: export diff --git a/mllm/backends/cpu/kernels/arm/sigmoid.cpp b/mllm/backends/cpu/kernels/arm/sigmoid.cpp new file mode 100644 index 000000000..8f18f5df8 --- /dev/null +++ b/mllm/backends/cpu/kernels/arm/sigmoid.cpp @@ -0,0 +1,131 @@ +// Copyright (c) MLLM Team. +// Licensed under the MIT License. + +#include "mllm/backends/cpu/kernels/arm/sigmoid.hpp" +#include "mllm/core/Parallel.hpp" + +#if defined(MLLM_HOST_ARCH_ARM64) || defined(MLLM_HOST_ARCH_ARM) + +#include "mllm/backends/cpu/kernels/arm/math.hpp" + +namespace mllm::cpu::arm { + +void sigmoid_fp32(const mllm_fp32_t* __restrict X, mllm_fp32_t* __restrict Y, int len, int thread_count) { + if (thread_count > 1) { + int tails = len % 16; + int _16_loops = len < 16 ? 0 : len - tails; + MLLM_AUTO_PARALLEL_FOR_BEGIN_NT(i, 0, _16_loops, 16, thread_count) { + float32x4_t x_line_0 = vld1q_f32(X + i); + float32x4_t ans_line_0 = vsigmoid_f32(x_line_0); + vst1q_f32(Y + i, ans_line_0); + + float32x4_t x_line_1 = vld1q_f32(X + i + 4); + float32x4_t ans_line_1 = vsigmoid_f32(x_line_1); + vst1q_f32(Y + i + 4, ans_line_1); + + float32x4_t x_line_2 = vld1q_f32(X + i + 8); + float32x4_t ans_line_2 = vsigmoid_f32(x_line_2); + vst1q_f32(Y + i + 8, ans_line_2); + + float32x4_t x_line_3 = vld1q_f32(X + i + 12); + float32x4_t ans_line_3 = vsigmoid_f32(x_line_3); + vst1q_f32(Y + i + 12, ans_line_3); + } + MLLM_AUTO_PARALLEL_FOR_END_NT() + int i = _16_loops; + for (; i <= len - 8; i += 8) { + float32x4_t x_line_0 = vld1q_f32(X + i); + float32x4_t ans_line_0 = vsigmoid_f32(x_line_0); + vst1q_f32(Y + i, ans_line_0); + + float32x4_t x_line_1 = vld1q_f32(X + i + 4); + float32x4_t ans_line_1 = vsigmoid_f32(x_line_1); + vst1q_f32(Y + i + 4, ans_line_1); + } + for (; i <= len - 4; i += 4) { + float32x4_t x_line_0 = vld1q_f32(X + i); + float32x4_t ans_line_0 = vsigmoid_f32(x_line_0); + vst1q_f32(Y + i, ans_line_0); + } + for (; i < len; i++) { Y[i] = 1.0f / (1.0f + std::exp(-X[i])); } + } else { + int i; + for (i = 0; i <= len - 16; i += 16) { + float32x4_t x_line_0 = vld1q_f32(X + i); + float32x4_t ans_line_0 = vsigmoid_f32(x_line_0); + vst1q_f32(Y + i, ans_line_0); + + float32x4_t x_line_1 = vld1q_f32(X + i + 4); + float32x4_t ans_line_1 = vsigmoid_f32(x_line_1); + vst1q_f32(Y + i + 4, ans_line_1); + + float32x4_t x_line_2 = vld1q_f32(X + i + 8); + float32x4_t ans_line_2 = vsigmoid_f32(x_line_2); + vst1q_f32(Y + i + 8, ans_line_2); + + float32x4_t x_line_3 = vld1q_f32(X + i + 12); + float32x4_t ans_line_3 = vsigmoid_f32(x_line_3); + vst1q_f32(Y + i + 12, ans_line_3); + } + for (; i <= len - 8; i += 8) { + float32x4_t x_line_0 = vld1q_f32(X + i); + float32x4_t ans_line_0 = vsigmoid_f32(x_line_0); + vst1q_f32(Y + i, ans_line_0); + + float32x4_t x_line_1 = vld1q_f32(X + i + 4); + float32x4_t ans_line_1 = vsigmoid_f32(x_line_1); + vst1q_f32(Y + i + 4, ans_line_1); + } + for (; i <= len - 4; i += 4) { + float32x4_t x_line_0 = vld1q_f32(X + i); + float32x4_t ans_line_0 = vsigmoid_f32(x_line_0); + vst1q_f32(Y + i, ans_line_0); + } + for (; i < len; i++) { Y[i] = 1.0f / (1.0f + std::exp(-X[i])); } + } +} + +void sigmoid_fp16(const mllm_fp16_t* __restrict X, mllm_fp16_t* __restrict Y, int len, int thread_count) { + if (thread_count > 1) { + int tails = len % 16; + int _16_loops = len < 16 ? 0 : len - tails; + MLLM_AUTO_PARALLEL_FOR_BEGIN_NT(i, 0, _16_loops, 16, thread_count) { + float16x8_t x_line_0 = vld1q_f16(X + i); + float16x8_t ans_line_0 = vsigmoid_f16(x_line_0); + vst1q_f16(Y + i, ans_line_0); + + float16x8_t x_line_1 = vld1q_f16(X + i + 8); + float16x8_t ans_line_1 = vsigmoid_f16(x_line_1); + vst1q_f16(Y + i + 8, ans_line_1); + } + MLLM_AUTO_PARALLEL_FOR_END_NT() + int i = _16_loops; + for (; i <= len - 8; i += 8) { + float16x8_t x_line_0 = vld1q_f16(X + i); + float16x8_t ans_line_0 = vsigmoid_f16(x_line_0); + vst1q_f16(Y + i, ans_line_0); + } + for (; i < len; i++) { Y[i] = 1.0f / (1.0f + std::exp(-static_cast(X[i]))); } + } else { + int i; + for (i = 0; i <= len - 16; i += 16) { + float16x8_t x_line_0 = vld1q_f16(X + i); + float16x8_t ans_line_0 = vsigmoid_f16(x_line_0); + vst1q_f16(Y + i, ans_line_0); + + float16x8_t x_line_1 = vld1q_f16(X + i + 8); + float16x8_t ans_line_1 = vsigmoid_f16(x_line_1); + vst1q_f16(Y + i + 8, ans_line_1); + } + for (; i <= len - 8; i += 8) { + float16x8_t x_line_0 = vld1q_f16(X + i); + float16x8_t ans_line_0 = vsigmoid_f16(x_line_0); + vst1q_f16(Y + i, ans_line_0); + } + for (; i < len; i++) { Y[i] = 1.0f / (1.0f + std::exp(-static_cast(X[i]))); } + } +} + +} // namespace mllm::cpu::arm + +#endif diff --git a/mllm/backends/cpu/kernels/arm/sigmoid.hpp b/mllm/backends/cpu/kernels/arm/sigmoid.hpp new file mode 100644 index 000000000..70ca78d4b --- /dev/null +++ b/mllm/backends/cpu/kernels/arm/sigmoid.hpp @@ -0,0 +1,18 @@ +// Copyright (c) MLLM Team. +// Licensed under the MIT License. +#pragma once + +#include "mllm/core/DataTypes.hpp" +#include "mllm/utils/CPUArchHelper.hpp" + +#if defined(MLLM_HOST_ARCH_ARM64) || defined(MLLM_HOST_ARCH_ARM) + +namespace mllm::cpu::arm { + +void sigmoid_fp32(const mllm_fp32_t* __restrict X, mllm_fp32_t* __restrict Y, int len, int thread_count); + +void sigmoid_fp16(const mllm_fp16_t* __restrict X, mllm_fp16_t* __restrict Y, int len, int thread_count); + +} // namespace mllm::cpu::arm + +#endif diff --git a/mllm/backends/cpu/kernels/x86/sigmoid.cpp b/mllm/backends/cpu/kernels/x86/sigmoid.cpp new file mode 100644 index 000000000..53a5fe84d --- /dev/null +++ b/mllm/backends/cpu/kernels/x86/sigmoid.cpp @@ -0,0 +1,47 @@ +// Copyright (c) MLLM Team. +// Licensed under the MIT License. + +#include "mllm/backends/cpu/kernels/x86/sigmoid.hpp" +#include "mllm/core/Parallel.hpp" + +#if defined(MLLM_HOST_ARCH_X86) || defined(MLLM_HOST_ARCH_X86_64) + +#include "mllm/backends/cpu/kernels/common/sigmoid-inl.hpp" +#include + +namespace mllm::cpu::x86 { + +namespace hn = hwy::HWY_NAMESPACE; + +void sigmoid_fp32(const mllm_fp32_t* __restrict X, mllm_fp32_t* __restrict Y, int len, int thread_count) { + using D = hn::ScalableTag; + const D d; + const auto vector_size = hn::Lanes(d); + const int aligned_len = len - (len % vector_size); + + if (thread_count > 1) { + MLLM_AUTO_PARALLEL_FOR_BEGIN_NT(i, 0, aligned_len, vector_size, thread_count) { + auto x = hn::LoadU(d, X + i); + auto result = mllm::cpu::common::HWY_NAMESPACE::__sigmoid_fp32_vector(d, x); + hn::StoreU(result, d, Y + i); + } + MLLM_AUTO_PARALLEL_FOR_END_NT() + + // Handle remaining elements + for (int i = aligned_len; i < len; ++i) { Y[i] = 1.0f / (1.0f + std::exp(-X[i])); } + } else { + int i = 0; + for (; i + vector_size <= len; i += vector_size) { + auto x = hn::LoadU(d, X + i); + auto result = mllm::cpu::common::HWY_NAMESPACE::__sigmoid_fp32_vector(d, x); + hn::StoreU(result, d, Y + i); + } + + // Handle remaining elements + for (; i < len; ++i) { Y[i] = 1.0f / (1.0f + std::exp(-X[i])); } + } +} + +} // namespace mllm::cpu::x86 + +#endif diff --git a/mllm/backends/cpu/kernels/x86/sigmoid.hpp b/mllm/backends/cpu/kernels/x86/sigmoid.hpp new file mode 100644 index 000000000..96b719e8e --- /dev/null +++ b/mllm/backends/cpu/kernels/x86/sigmoid.hpp @@ -0,0 +1,16 @@ +// Copyright (c) MLLM Team. +// Licensed under the MIT License. +#pragma once + +#include "mllm/core/DataTypes.hpp" +#include "mllm/utils/CPUArchHelper.hpp" + +#if defined(MLLM_HOST_ARCH_X86_64) || defined(MLLM_HOST_ARCH_X86) + +namespace mllm::cpu::x86 { + +void sigmoid_fp32(const mllm_fp32_t* __restrict X, mllm_fp32_t* __restrict Y, int len, int thread_count); + +} // namespace mllm::cpu::x86 + +#endif diff --git a/mllm/backends/cpu/ops/LinearOp.cpp b/mllm/backends/cpu/ops/LinearOp.cpp index 62f2392ca..f3c7bfa64 100644 --- a/mllm/backends/cpu/ops/LinearOp.cpp +++ b/mllm/backends/cpu/ops/LinearOp.cpp @@ -360,7 +360,7 @@ void CPULinearOp::reshape(const std::vector& inputs, std::vector } case aops::LinearImplTypes::kQNN_LPBQ_w4a16o16_G32: case aops::LinearImplTypes::kQNN_LPBQ_w4a16o16_G64: { - o_dtype = kInt16PerTensorSym; + o_dtype = kUInt16PerTensorAsy; break; } default: o_dtype = i.dtype(); diff --git a/mllm/backends/cpu/ops/SigmoidOp.cpp b/mllm/backends/cpu/ops/SigmoidOp.cpp new file mode 100644 index 000000000..9bd8880f7 --- /dev/null +++ b/mllm/backends/cpu/ops/SigmoidOp.cpp @@ -0,0 +1,41 @@ +// Copyright (c) MLLM Team. +// Licensed under the MIT License. + +#include +#include "mllm/backends/cpu/ops/SigmoidOp.hpp" +#include "mllm/backends/cpu/kernels/Kernels.hpp" + +namespace mllm::cpu { + +CPUSigmoidOp::CPUSigmoidOp(const aops::SigmoidOpOptions& options) : aops::SigmoidOp(options) {} + +void CPUSigmoidOp::forward(const std::vector& inputs, std::vector& outputs) { + const auto& X = inputs[0]; + auto& Y = outputs[0]; + + switch (X.dtype()) { + case kFloat32: { +#if defined(MLLM_HOST_ARCH_X86_64) || defined(MLLM_HOST_ARCH_X86) + x86::sigmoid_fp32(X.ptr(), Y.ptr(), X.numel(), options_.getThreads()); +#elif defined(MLLM_HOST_ARCH_ARM64) || defined(MLLM_HOST_ARCH_ARM) + arm::sigmoid_fp32(X.ptr(), Y.ptr(), X.numel(), options_.getThreads()); +#else + NYI("Sigmoid not supported for Other Architectures"); +#endif + break; + } + case kFloat16: { +#if defined(MLLM_HOST_ARCH_X86_64) || defined(MLLM_HOST_ARCH_X86) + NYI("Sigmoid FP16 not implemented yet for X86"); +#elif defined(MLLM_HOST_ARCH_ARM64) || defined(MLLM_HOST_ARCH_ARM) + arm::sigmoid_fp16(X.ptr(), Y.ptr(), X.numel(), options_.getThreads()); +#else + NYI("Sigmoid not supported for Other Architectures"); +#endif + break; + } + default: NYI("CPUSigmoidOp::forward not support dtype {}", nameOfType(X.dtype())); break; + } +} + +} // namespace mllm::cpu diff --git a/mllm/backends/cpu/ops/SigmoidOp.hpp b/mllm/backends/cpu/ops/SigmoidOp.hpp new file mode 100644 index 000000000..507cc44e5 --- /dev/null +++ b/mllm/backends/cpu/ops/SigmoidOp.hpp @@ -0,0 +1,25 @@ +// Copyright (c) MLLM Team. +// Licensed under the MIT License. + +#pragma once + +#include "mllm/core/BaseOp.hpp" +#include "mllm/core/aops/SigmoidOp.hpp" + +namespace mllm::cpu { + +class CPUSigmoidOp final : public aops::SigmoidOp { + public: + explicit CPUSigmoidOp(const aops::SigmoidOpOptions& options); + + void forward(const std::vector& inputs, std::vector& outputs) override; +}; + +class CPUSigmoidOpFactory : public TypedOpFactory { + public: + std::shared_ptr createOpImpl(const aops::SigmoidOpOptions& options) override { + return std::make_shared(options); + } +}; + +} // namespace mllm::cpu diff --git a/mllm/backends/qnn/aot/passes/AOTCompileContext.cpp b/mllm/backends/qnn/aot/passes/AOTCompileContext.cpp index 43d8801d6..05fe55560 100644 --- a/mllm/backends/qnn/aot/passes/AOTCompileContext.cpp +++ b/mllm/backends/qnn/aot/passes/AOTCompileContext.cpp @@ -21,4 +21,8 @@ void AOTCompileContext::setConfig(const std::string& fp) { nlohmann::json& AOTCompileContext::getConfig() { return config_; } +void AOTCompileContext::setParamFile(const ParameterFile::ptr_t& params) { params_ = params; } + +ParameterFile::ptr_t AOTCompileContext::getParamFile() { return params_; } + } // namespace mllm::qnn::aot diff --git a/mllm/backends/qnn/aot/passes/AOTCompileContext.hpp b/mllm/backends/qnn/aot/passes/AOTCompileContext.hpp index a9def31ef..9fdcff1c4 100644 --- a/mllm/backends/qnn/aot/passes/AOTCompileContext.hpp +++ b/mllm/backends/qnn/aot/passes/AOTCompileContext.hpp @@ -5,6 +5,7 @@ #include #include "mllm/backends/qnn/aot/QnnWrappersAPI.hpp" +#include "mllm/core/ParameterFile.hpp" namespace mllm::qnn::aot { @@ -29,12 +30,17 @@ class AOTCompileContext { nlohmann::json& getConfig(); + void setParamFile(const ParameterFile::ptr_t& params); + + ParameterFile::ptr_t getParamFile(); + private: // Private constructor AOTCompileContext() = default; QnnAOTEnv* env_ = nullptr; nlohmann::json config_; + ParameterFile::ptr_t params_; }; } // namespace mllm::qnn::aot diff --git a/mllm/backends/qnn/aot/passes/AOTPipeline.cpp b/mllm/backends/qnn/aot/passes/AOTPipeline.cpp index b1caa2d13..c60c6aa78 100644 --- a/mllm/backends/qnn/aot/passes/AOTPipeline.cpp +++ b/mllm/backends/qnn/aot/passes/AOTPipeline.cpp @@ -6,6 +6,7 @@ #include "mllm/backends/qnn/aot/passes/MarkTensorIO.hpp" #include "mllm/backends/qnn/aot/passes/MergeLLMHeadIntoMainGraphPass.hpp" #include "mllm/backends/qnn/aot/passes/OpNamingPass.hpp" +#include "mllm/backends/qnn/aot/passes/PTQPass.hpp" #include "mllm/backends/qnn/aot/passes/SplitLLMGraphPass.hpp" namespace mllm::qnn::aot { @@ -22,9 +23,10 @@ std::vector> createQnnAOTLoweringPipeline(QnnAOTEnv* e ret.emplace_back(createOpNamingPass()); ret.emplace_back(createMergeLLMHeadIntoMainGraphPass()); ret.emplace_back(createLLMQuantRecipePass()); - ret.emplace_back(createSplitLLMGraphPass()); - ret.emplace_back(createMarkTensorIOPass()); - ret.emplace_back(createLLM2QnnLoweringPass()); + ret.emplace_back(createPTQPass()); + // ret.emplace_back(createSplitLLMGraphPass()); + // ret.emplace_back(createMarkTensorIOPass()); + // ret.emplace_back(createLLM2QnnLoweringPass()); } else { MLLM_WARN("This pass currently only supports LLM applications. Please ensure your config contains 'quant_recipe.llm_recipe " "= true'."); diff --git a/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.cpp b/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.cpp index e6a16b824..adada76ed 100644 --- a/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.cpp +++ b/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.cpp @@ -38,6 +38,13 @@ void recursiveVisitGraph(const ir::IRContext::ptr_t& ctx, if (!some_op->getAttr("quant_recipe")) { for (auto& pattern : patterns_w_priority_) { if (pattern.second->isMatch(some_op)) { + for (auto& _named_pattern_ : _named_pattern) { + if (_named_pattern_.second == pattern.second) { + MLLM_INFO("LLMQuantizationRecipePass Processing op: {} with pass: {}", + some_op->cast_()->getAOp()->getName(), _named_pattern_.first); + } + } + if (!pattern.second->rewrite(iw, some_op)) { for (auto& _named_pattern_ : _named_pattern) { if (_named_pattern_.second == pattern.second) { @@ -87,6 +94,11 @@ ir::linalg::LinalgIRQuantizatonSpecAttr::ptr_t genSimpleQuantizationSpecAttr(con spec = ir::linalg::QuantizationSpecSymPerTensor::create(0, 65535, kUInt16, kFloat32, Tensor::nil()); break; } + case kUInt16PerTensorAsy: { + spec = + ir::linalg::QuantizationSpecAsymPerTensor::create(0, 65535, kUInt16, kFloat32, kInt32, Tensor::nil(), Tensor::nil()); + break; + } case kUInt8: case kUInt16: case kUInt32: @@ -241,6 +253,31 @@ ir::linalg::LinalgIRQuantizatonSpecAttr::ptr_t cloneQuantizationSpecType( return ctx->create(cloned_spec); } +//===----------------------------------------------------------------------===// +// Sigmoid Pattern +//===----------------------------------------------------------------------===// +bool LLMQuantRecipeSigmoidPattern::isMatch(const mllm::ir::op_ptr_t& op) { + if (op->isa_()) { return true; } + return false; +} + +bool LLMQuantRecipeSigmoidPattern::rewrite(ir::IRWriter& writer, const ir::op_ptr_t& node) { + return noSharingSingleInAndSingleOutQuantAnnoAttr(writer.getContext(), node->cast_()); +} + +//===----------------------------------------------------------------------===// +// Negative Pattern +//===----------------------------------------------------------------------===// +bool LLMQuantRecipeNegPattern::isMatch(const mllm::ir::op_ptr_t& op) { + if (op->isa_()) { return true; } + return false; +} + +bool LLMQuantRecipeNegPattern::rewrite(ir::IRWriter& writer, const ir::op_ptr_t& node) { + return shareQuantSpecSingleInputToSingleOutputAndSetOpQuantAnnoAttr(writer.getContext(), + node->cast_()); +} + //===----------------------------------------------------------------------===// // ReduceMin Pattern //===----------------------------------------------------------------------===// @@ -352,7 +389,12 @@ bool LLMQuantRecipeRMSNormPattern::rewrite(ir::IRWriter& writer, const ir::op_pt auto weight_spec_attr = cloneQuantizationSpecType( writer.getContext(), node->inputs().front()->getAttr("quant_recipe")->cast_()); - weight_reg_tensor_ir->outputs().front()->setAttr("qnn_recipe", weight_spec_attr); + weight_reg_tensor_ir->outputs().front()->setAttr("quant_recipe", weight_spec_attr); + + // Get self anno + node->getAttr("quant_recipe") + ->cast_() + ->annotation_.weights.insert({"weight", weight_spec_attr->spec_}); return true; } @@ -401,6 +443,19 @@ bool LLMQuantRecipeIndexPattern::rewrite(ir::IRWriter& writer, const ir::op_ptr_ return true; } +//===----------------------------------------------------------------------===// +// Slice Pattern +//===----------------------------------------------------------------------===// +bool LLMQuantRecipeSlicePattern::isMatch(const mllm::ir::op_ptr_t& op) { + if (op->isa_()) { return true; } + return false; +} + +bool LLMQuantRecipeSlicePattern::rewrite(ir::IRWriter& writer, const ir::op_ptr_t& node) { + return shareQuantSpecSingleInputToSingleOutputAndSetOpQuantAnnoAttr(writer.getContext(), + node->cast_()); +} + //===----------------------------------------------------------------------===// // Elementwise Pattern //===----------------------------------------------------------------------===// @@ -422,23 +477,9 @@ bool LLMQuantRecipeElementwisePattern::rewrite(ir::IRWriter& writer, const ir::o // i_1 maybe a constant, we need to create quant recipe for it if (!i_1->getAttr("quant_recipe")) { if (i_1->getAttr("constant")) { - auto i_1_tensor = i_1->cast_()->tensor_; - switch (i_1_tensor.dtype()) { - case kUInt16: - case kUInt8: - case kInt16: - case kInt8: - case kFloat32: - case kFloat16: - case kBFloat16: { - i_1->setAttr("quant_recipe", writer.create( - ir::linalg::QuantizationSpecRaw::create(i_1_tensor.dtype()))); - break; - } - default: { - NYI("Only support [int16, int8, bf16, f16, sf32] for now."); - } - } + i_1->setAttr("quant_recipe", + cloneQuantizationSpecType(writer.getContext(), + i_0->getAttr("quant_recipe")->cast_())); } else { MLLM_WARN("LLMQuantRecipeEqualPattern Only support constant Value as second inputs right now. Pls send us a issue or PR " @@ -447,8 +488,6 @@ bool LLMQuantRecipeElementwisePattern::rewrite(ir::IRWriter& writer, const ir::o } } - MLLM_RETURN_FALSE_IF_NOT(i_1->getAttr("quant_recipe")); - o_0->setAttr("quant_recipe", i_0->getAttr("quant_recipe")); auto annotation_attr = writer.create(); @@ -738,7 +777,8 @@ bool LLMQuantRecipeLinearPattern::rewrite(ir::IRWriter& writer, const ir::op_ptr ir::linalg::QuantizationSpecLPBQ::create(-8, 7, block_size, -1, 4, kUInt4, kFloat32, Tensor::nil(), Tensor::nil()); // output sym int16 - auto out_quant_spec = ir::linalg::QuantizationSpecSymPerTensor::create(-32768, 32767, kInt16, kFloat32, Tensor::nil()); + auto out_quant_spec = ir::linalg::QuantizationSpecAsymPerTensor::create(0, 65536, kUInt16, kFloat32, kInt32, + Tensor::nil(), Tensor::nil()); linear_ir->outputs().front()->setAttr("quant_recipe", writer.create(out_quant_spec)); @@ -857,14 +897,32 @@ bool LLMQuantRecipeEmbeddingPattern::rewrite(ir::IRWriter& writer, const ir::op_ o_quant_spec = ir::linalg::QuantizationSpecSymPerTensor::create(0, 65535, kUInt16, kFloat32, Tensor::nil()); break; } + case kUInt16PerTensorAsy: { + o_quant_spec = ir::linalg::QuantizationSpecAsymPerTensor::create(0, 65535, kUInt16, kFloat32, kInt32, Tensor::nil(), + Tensor::nil()); + break; + } default: { NYI("Only support [uint16, int16, uint8, int8], [sym] for now."); } } + // Weights + auto weight_name = embedding_op->getAOp()->getName() + ".weight"; + auto weight_reg_tensor_ir = writer.getContext()->lookupSymbolTable(weight_name); + MLLM_RETURN_FALSE_IF_NOT(weight_reg_tensor_ir); + MLLM_RETURN_FALSE_IF_NOT(weight_reg_tensor_ir->isa_()); + MLLM_RETURN_FALSE_IF_NOT(weight_reg_tensor_ir->outputs().front()->isa_()); + auto weight_tensor = weight_reg_tensor_ir->outputs().front()->cast_(); + annotation_attr->annotation_.outputs.emplace_back(o_quant_spec); quantize_op->outputs().front()->setAttr("quant_recipe", writer.create(o_quant_spec)); + + // Embedding weight quantization method same as outputs, but not share, just same type + auto weight_spec_attr = genSimpleQuantizationSpecAttr(writer.getContext(), weight_tensor); + weight_reg_tensor_ir->outputs().front()->setAttr("quant_recipe", weight_spec_attr); + annotation_attr->annotation_.weights.insert({"weight", weight_spec_attr->spec_}); } // Attach to quantize node @@ -941,6 +999,9 @@ bool LLMQuantRecipeQwen3AttentionPattern::rewrite(ir::IRWriter& writer, const ir LLMQuantRecipePass::LLMQuantRecipePass() { auto config = AOTCompileContext::getInstance().getConfig(); // Register all patterns + addPattern(LLMQuantRecipeNegPattern::create(), "neg", 0); + addPattern(LLMQuantRecipeSlicePattern::create(), "slice", 0); + addPattern(LLMQuantRecipeSigmoidPattern::create(), "sigmoid", 0); addPattern(LLMQuantRecipeReduceMinPattern::create(), "reduce_min", 0); addPattern(LLMQuantRecipeRoPEPattern::create(), "rope", 0); addPattern(LLMQuantRecipeCastTypePattern::create(), "cast_type", 0); @@ -958,9 +1019,6 @@ LLMQuantRecipePass::LLMQuantRecipePass() { addPattern(LLMQuantRecipeLinearPattern::create(), "linear", 0); addPattern(LLMQuantRecipeEmbeddingPattern::create(), "embedding", 0); addPattern(LLMQuantRecipeViewPattern::create(), "view", 0); - if (config["quant_recipe"]["builtin_llm_pass"]["model"] == "qwen3") { - addPattern(LLMQuantRecipeQwen3AttentionPattern::create(), "qwen3_attention", 100); - } } uint8_t LLMQuantRecipePass::run(const ir::node_ptr_t& op) { diff --git a/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.hpp b/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.hpp index dbb6d1dc1..abd7cdbcc 100644 --- a/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.hpp +++ b/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.hpp @@ -32,6 +32,32 @@ bool noSharingSingleInAndSingleOutQuantAnnoAttr(const ir::IRContext::ptr_t& ctx, ir::linalg::LinalgIRQuantizatonSpecAttr::ptr_t cloneQuantizationSpecType( const ir::IRContext::ptr_t& ctx, const ir::linalg::LinalgIRQuantizatonSpecAttr::ptr_t& from); +//===----------------------------------------------------------------------===// +// Sigmoid Pattern +//===----------------------------------------------------------------------===// +class LLMQuantRecipeSigmoidPattern : public ir::Pattern { + public: + bool isMatch(const mllm::ir::op_ptr_t& op) override; + + bool rewrite(ir::IRWriter& writer, const ir::op_ptr_t& node) override; + + static inline std::shared_ptr create() { + return std::make_shared(); + } +}; + +//===----------------------------------------------------------------------===// +// Negative Pattern +//===----------------------------------------------------------------------===// +class LLMQuantRecipeNegPattern : public ir::Pattern { + public: + bool isMatch(const mllm::ir::op_ptr_t& op) override; + + bool rewrite(ir::IRWriter& writer, const ir::op_ptr_t& node) override; + + static inline std::shared_ptr create() { return std::make_shared(); } +}; + //===----------------------------------------------------------------------===// // ReduceMin Pattern //===----------------------------------------------------------------------===// @@ -110,6 +136,18 @@ class LLMQuantRecipeIndexPattern : public ir::Pattern { static inline std::shared_ptr create() { return std::make_shared(); } }; +//===----------------------------------------------------------------------===// +// Slice Pattern +//===----------------------------------------------------------------------===// +class LLMQuantRecipeSlicePattern : public ir::Pattern { + public: + bool isMatch(const mllm::ir::op_ptr_t& op) override; + + bool rewrite(ir::IRWriter& writer, const ir::op_ptr_t& node) override; + + static inline std::shared_ptr create() { return std::make_shared(); } +}; + //===----------------------------------------------------------------------===// // Elementwise Pattern //===----------------------------------------------------------------------===// diff --git a/mllm/backends/qnn/aot/passes/PTQPass.cpp b/mllm/backends/qnn/aot/passes/PTQPass.cpp index e69de29bb..9d4cabee3 100644 --- a/mllm/backends/qnn/aot/passes/PTQPass.cpp +++ b/mllm/backends/qnn/aot/passes/PTQPass.cpp @@ -0,0 +1,29 @@ +// Copyright (c) MLLM Team. +// Licensed under the MIT License. + +#include "mllm/backends/qnn/aot/passes/PTQPass.hpp" +#include "mllm/backends/qnn/aot/passes/AOTCompileContext.hpp" +#include "mllm/compile/ir/builtin/Op.hpp" +#include "mllm/compile/ir/graph/Op.hpp" +#include "mllm/compile/ir/linalg/Op.hpp" +#include "mllm/compile/ir/tensor/Value.hpp" +#include "mllm/compile/ir/cf/Op.hpp" +#include "mllm/compile/ir/Node.hpp" +#include "mllm/core/OpTypes.hpp" +#include "mllm/utils/Common.hpp" + +namespace mllm::qnn::aot { + +namespace { + +void solveStaticWeights() {} + +void solveStaticRoPE() {} + +} // namespace + +uint8_t PTQPass::run(const ir::node_ptr_t& op) { return ir::PASS_RET_SUCCESS; } + +ir::Pass::ptr_t createPTQPass() { return std::make_shared(); } + +} // namespace mllm::qnn::aot diff --git a/mllm/backends/qnn/aot/passes/PTQPass.hpp b/mllm/backends/qnn/aot/passes/PTQPass.hpp index e69de29bb..6d6d35305 100644 --- a/mllm/backends/qnn/aot/passes/PTQPass.hpp +++ b/mllm/backends/qnn/aot/passes/PTQPass.hpp @@ -0,0 +1,32 @@ +// Copyright (c) MLLM Team. +// Licensed under the MIT License. + +#pragma once + +#include "mllm/compile/passes/Pass.hpp" +#include "mllm/compile/ir/Node.hpp" + +namespace mllm::qnn::aot { + +//===----------------------------------------------------------------------===// +// PTQPass - Post-Training Quantization Pass +// This pass applies post-training quantization transformations to the IR. +// It walks through the computation graph and applies quantization +// based on configuration parameters. +//===----------------------------------------------------------------------===// +class PTQPass final : public ir::Pass { + public: + PTQPass() = default; + + ~PTQPass() override = default; + + // Run the PTQ pass on the given operation + // Expected input: ModuleOp containing the computation graph + // Output: Modified IR with PTQ transformations applied + uint8_t run(const ir::node_ptr_t& op) override; +}; + +// Factory function to create PTQPass instance +ir::Pass::ptr_t createPTQPass(); + +} // namespace mllm::qnn::aot diff --git a/mllm/compile/ir/GeneratedRTTIKind.hpp b/mllm/compile/ir/GeneratedRTTIKind.hpp index 0f48660a6..9c48d0535 100644 --- a/mllm/compile/ir/GeneratedRTTIKind.hpp +++ b/mllm/compile/ir/GeneratedRTTIKind.hpp @@ -1,4 +1,4 @@ -// Auto generated: 2025-12-29 05:14:54 +// Auto generated: 2026-01-04 13:13:09 // do not modify this file #pragma once @@ -85,6 +85,7 @@ enum NodeKind : uint32_t { RK_Op_LinalgIROp_RadixAttnSwaSinkOp, RK_Op_LinalgIROp_EqualOp, RK_Op_LinalgIROp_WhereOp, + RK_Op_LinalgIROp_SigmoidOp, RK_Op_LinalgIROp_CustomizedOp, RK_Op_LinalgIROp_Last, RK_Op_GraphIROp, diff --git a/mllm/compile/ir/NodeRTTIClassOfImpl.hpp b/mllm/compile/ir/NodeRTTIClassOfImpl.hpp index c7de7f72e..1e631c8de 100644 --- a/mllm/compile/ir/NodeRTTIClassOfImpl.hpp +++ b/mllm/compile/ir/NodeRTTIClassOfImpl.hpp @@ -1,4 +1,4 @@ -// Auto generated: 2025-12-29 05:14:54 +// Auto generated: 2026-01-04 13:13:09 // do not modify this file #pragma once namespace mllm::ir { @@ -226,6 +226,9 @@ struct NodeRTTIClassOfImpl { #define RTTI_RK_OP_LINALGIROP_WHEREOP_IMPL(v) \ return (v)->getKind() >= RK_Op_LinalgIROp_WhereOp && (v)->getKind() <= RK_Op_LinalgIROp_WhereOp +#define RTTI_RK_OP_LINALGIROP_SIGMOIDOP_IMPL(v) \ + return (v)->getKind() >= RK_Op_LinalgIROp_SigmoidOp && (v)->getKind() <= RK_Op_LinalgIROp_SigmoidOp + #define RTTI_RK_OP_LINALGIROP_CUSTOMIZEDOP_IMPL(v) \ return (v)->getKind() >= RK_Op_LinalgIROp_CustomizedOp && (v)->getKind() <= RK_Op_LinalgIROp_CustomizedOp diff --git a/mllm/compile/ir/linalg/Attribute.hpp b/mllm/compile/ir/linalg/Attribute.hpp index 34a116c5c..576530362 100644 --- a/mllm/compile/ir/linalg/Attribute.hpp +++ b/mllm/compile/ir/linalg/Attribute.hpp @@ -60,6 +60,7 @@ struct QuantizationSpec { using ptr_t = std::shared_ptr; QuantizationSpecType type; uint64_t uuid; + bool solved = false; }; struct QuantizationSpecRaw : public QuantizationSpec { diff --git a/mllm/compile/ir/linalg/Op.cpp b/mllm/compile/ir/linalg/Op.cpp index 320dcf5d3..c303e9d3e 100644 --- a/mllm/compile/ir/linalg/Op.cpp +++ b/mllm/compile/ir/linalg/Op.cpp @@ -116,6 +116,7 @@ LINALG_AOPS_DECL(OpTypes::kArgsort, ArgsortOp); LINALG_AOPS_DECL(OpTypes::kEqual, EqualOp); LINALG_AOPS_DECL(OpTypes::kWhere, WhereOp); +LINALG_AOPS_DECL(OpTypes::kSigmoid, SigmoidOp); // Customized Ops LINALG_AOPS_DECL(OpTypes::kFlashAttention2WithSinkAndSwa, FlashAttention2SwaSinkOp); diff --git a/mllm/compile/ir/linalg/Op.hpp b/mllm/compile/ir/linalg/Op.hpp index 7e93a288f..a737a623a 100644 --- a/mllm/compile/ir/linalg/Op.hpp +++ b/mllm/compile/ir/linalg/Op.hpp @@ -79,6 +79,7 @@ class RadixAttnRelaxOp; class RadixAttnSwaSinkOp; class EqualOp; class WhereOp; +class SigmoidOp; } // namespace mllm #define LINALG_AOPS_DEFINE(class_name, rtti_name) \ @@ -251,6 +252,7 @@ LINALG_AOPS_DEFINE(WhereOp, WHEREOP); LINALG_AOPS_DEFINE(FlashAttention2SwaSinkOp, FLASHATTENTION2SWASINKOP); LINALG_AOPS_DEFINE(RadixAttnRelaxOp, RADIXATTNRELAXOP); LINALG_AOPS_DEFINE(RadixAttnSwaSinkOp, RADIXATTNSWASINKOP); +LINALG_AOPS_DEFINE(SigmoidOp, SIGMOIDOP); /** * @brief CustomizedOp: A generic operation type for implementing backend-specific operations diff --git a/mllm/compile/ir/rtti_kind_gen.py b/mllm/compile/ir/rtti_kind_gen.py index cb2ad4d52..7615e323b 100644 --- a/mllm/compile/ir/rtti_kind_gen.py +++ b/mllm/compile/ir/rtti_kind_gen.py @@ -290,6 +290,7 @@ def define_lianlg_ir(ir: dict): op.derive(Cls("RadixAttnSwaSinkOp")) op.derive(Cls("EqualOp")) op.derive(Cls("WhereOp")) + op.derive(Cls("SigmoidOp")) # customized ops op.derive(Cls("CustomizedOp")) diff --git a/mllm/core/DataTypes.cpp b/mllm/core/DataTypes.cpp index 0cc549b71..b349eb292 100644 --- a/mllm/core/DataTypes.cpp +++ b/mllm/core/DataTypes.cpp @@ -48,8 +48,11 @@ size_t lanesOfType(DataTypes dtype) { CASE(kComplexFloat64) CASE(kInt16PerTensorSym) CASE(kInt8PerTensorSym) + CASE(kUInt8PerTensorSym) + CASE(kUInt16PerTensorAsy) + CASE(kUInt16PerTensorSym) case kByte: return MllmDataTypeInfo::lanes(); - default: NYI("Unknown data type"); + default: NYI("Unknown data type {}", (int32_t)dtype); } return 1; #undef CASE @@ -96,7 +99,10 @@ size_t bytesOfType(DataTypes dtype) { CASE(kComplexFloat32) CASE(kComplexFloat64) CASE(kInt16PerTensorSym) + CASE(kUInt16PerTensorSym) + CASE(kUInt16PerTensorAsy) CASE(kInt8PerTensorSym) + CASE(kUInt8PerTensorSym) CASE(kInt4) CASE(kUInt4) case kByte: return MllmDataTypeInfo::bytes(); @@ -147,7 +153,10 @@ std::string nameOfType(DataTypes dtype) { CASE(kComplexFloat32) CASE(kComplexFloat64) CASE(kInt16PerTensorSym) + CASE(kUInt16PerTensorSym) + CASE(kUInt16PerTensorAsy) CASE(kInt8PerTensorSym) + CASE(kUInt8PerTensorSym) CASE(kInt4) CASE(kUInt4) case kByte: return MllmDataTypeInfo::name(); diff --git a/mllm/core/OpTypes.hpp b/mllm/core/OpTypes.hpp index 2b9916a90..849df8941 100644 --- a/mllm/core/OpTypes.hpp +++ b/mllm/core/OpTypes.hpp @@ -95,6 +95,8 @@ enum class OpTypes : int32_t { kEqual = 73, kWhere = 74, + kSigmoid = 75, + // Dynamic Op Start for user to register there own ops. kDynamicOp_Start = 4096, diff --git a/mllm/core/Tensor.cpp b/mllm/core/Tensor.cpp index e151341d9..ee0d69752 100644 --- a/mllm/core/Tensor.cpp +++ b/mllm/core/Tensor.cpp @@ -32,12 +32,12 @@ namespace mllm { void Tensor::operator delete(void* ptr) noexcept { ((Tensor*)ptr)->impl_.reset(); - for (auto& [a, _] : ((Tensor*)ptr)->attached_views_) { ((Tensor*)ptr)->attached_views_[a].reset(); } + for (auto& [a, _] : ((Tensor*)ptr)->impl_->attachedViews()) { ((Tensor*)ptr)->impl_->attachedViews()[a].reset(); } } void Tensor::delete_() noexcept { this->impl_.reset(); - for (auto& [a, _] : this->attached_views_) { this->attached_views_[a].reset(); } + for (auto& [a, _] : this->impl_->attachedViews()) { this->impl_->attachedViews()[a].reset(); } } /** @@ -75,6 +75,21 @@ Tensor Tensor::empty(const std::vector& shape, DataTypes dtype, DeviceT return Tensor(impl); } +Tensor Tensor::constant(float x, DataTypes dtype, DeviceTypes device) { + auto rhs_tensor = Tensor::empty({1}, dtype, device).alloc(); + switch (dtype) { + case kFloat32: *(rhs_tensor.ptr()) = x; break; + case kFloat16: *(rhs_tensor.ptr()) = half_float::half(x); break; + case kInt32: *(rhs_tensor.ptr()) = x; break; + case kInt16: *(rhs_tensor.ptr()) = x; break; + case kInt8: *(rhs_tensor.ptr()) = x; break; + case kInt16PerTensorSym: *(rhs_tensor.ptr()) = x; break; + case kUInt16PerTensorAsy: *(rhs_tensor.ptr()) = x; break; + default: NYI("Type is not supported"); break; + } + return rhs_tensor; +} + Tensor Tensor::emptyLike(const Tensor& liked_tensor) { auto ret = Tensor::empty(liked_tensor.shape(), liked_tensor.dtype(), liked_tensor.device()); return ret; @@ -82,16 +97,16 @@ Tensor Tensor::emptyLike(const Tensor& liked_tensor) { Tensor& Tensor::allocExtraTensorView(const std::string& extra_tensor_name, const std::vector& shape, DataTypes dtype, DeviceTypes device) { - MLLM_RT_ASSERT_EQ(attached_views_.count(extra_tensor_name), 0); + MLLM_RT_ASSERT_EQ(impl_->attachedViews().count(extra_tensor_name), 0); auto storage = TensorStorage::create(shape, dtype, device); auto impl = TensorViewImpl::create(shape, storage); - attached_views_.insert({extra_tensor_name, impl}); + impl_->attachedViews().insert({extra_tensor_name, impl}); return *this; } Tensor Tensor::getExtraTensorViewInTensor(const std::string& extra_tensor_name) { - MLLM_RT_ASSERT_EQ(attached_views_.count(extra_tensor_name), 1); - return Tensor(attached_views_.at(extra_tensor_name)); + MLLM_RT_ASSERT_EQ(impl_->attachedViews().count(extra_tensor_name), 1); + return Tensor(impl_->attachedViews().at(extra_tensor_name)); } Tensor Tensor::zeros(const std::vector& shape, DataTypes dtype, DeviceTypes device) { @@ -275,6 +290,27 @@ Tensor Tensor::mul(float rhs, DataTypes data_type) { return Context::instance().buildOpAndSubmitTask(OpTypes::kMul, opts, {*this, rhs_tensor})[0]; } +Tensor Tensor::addConstant(Tensor rhs) { + auto opts = aops::AddOpOptions{}; + opts.setInputsConstant(0, 0); + opts.setInputsConstant(1, 1); + return Context::instance().buildOpAndSubmitTask(OpTypes::kAdd, opts, {*this, rhs})[0]; // NOLINT +} + +Tensor Tensor::subConstant(Tensor rhs) { + auto opts = aops::SubOpOptions{}; + opts.setInputsConstant(0, 0); + opts.setInputsConstant(1, 1); + return Context::instance().buildOpAndSubmitTask(OpTypes::kSub, opts, {*this, rhs})[0]; // NOLINT +} + +Tensor Tensor::mulConstant(Tensor rhs) { + auto opts = aops::MulOpOptions{}; + opts.setInputsConstant(0, 0); + opts.setInputsConstant(1, 1); + return Context::instance().buildOpAndSubmitTask(OpTypes::kMul, opts, {*this, rhs})[0]; // NOLINT +} + Tensor Tensor::operator/(float rhs) { auto rhs_tensor = Tensor::empty({1}, dtype(), device()).alloc(); if (device() != kCPU) { @@ -485,14 +521,14 @@ size_t Tensor::hash() const { std::vector heap_buf; auto* buf = stack_buf; - size_t count = 1 + attached_views_.size(); + size_t count = 1 + impl_->attachedViews().size(); if (count > kStackCap) { heap_buf.resize(count); buf = heap_buf.data(); } buf[0] = uuid(); size_t idx = 1; - for (const auto& [_, view] : attached_views_) { buf[idx++] = view ? view->uuid() : 0u; } + for (const auto& [_, view] : impl_->attachedViews()) { buf[idx++] = view ? view->uuid() : 0u; } return XXH64(buf, count * sizeof(uint32_t), 0); } diff --git a/mllm/core/Tensor.hpp b/mllm/core/Tensor.hpp index 334441501..96a375622 100644 --- a/mllm/core/Tensor.hpp +++ b/mllm/core/Tensor.hpp @@ -175,6 +175,8 @@ class Tensor { */ static Tensor empty(const std::vector& shape, DataTypes dtype = kFloat32, DeviceTypes device = kCPU); + static Tensor constant(float x, DataTypes dtype = kFloat32, DeviceTypes device = kCPU); + /** * @brief Creates an uninitialized tensor with the same shape and attributes as another tensor. * @@ -290,6 +292,10 @@ class Tensor { Tensor sub(float rhs, DataTypes data_type = kFloat32); Tensor mul(float rhs, DataTypes data_type = kFloat32); + Tensor addConstant(Tensor rhs); + Tensor subConstant(Tensor rhs); + Tensor mulConstant(Tensor rhs); + /// @name Scalar Operations with complex rhs type /// Element-wise operations with complex rhs type scalar values. /// @{ @@ -692,16 +698,15 @@ class Tensor { return *(const_cast(this)->offsettedPtr(offsets)); } - [[nodiscard]] std::unordered_map& attachedViews() { return attached_views_; } + [[nodiscard]] std::unordered_map& attachedViews() { return impl_->attachedViews(); } - void attach(const std::string& name, const TensorViewImpl::ptr_t& view) { attached_views_[name] = view; } + void attach(const std::string& name, const TensorViewImpl::ptr_t& view) { impl_->attachedViews()[name] = view; } private: template friend __LinkedTensor operator<<(const Tensor& t, T first); std::shared_ptr impl_ = nullptr; - std::unordered_map attached_views_; }; template diff --git a/mllm/core/TensorViewImpl.hpp b/mllm/core/TensorViewImpl.hpp index 536148203..4b7b146b7 100644 --- a/mllm/core/TensorViewImpl.hpp +++ b/mllm/core/TensorViewImpl.hpp @@ -89,12 +89,15 @@ class TensorViewImpl : public std::enable_shared_from_this { inline void dropStorage() { storage_ = nullptr; } + inline std::unordered_map& attachedViews() { return attached_views_; } + private: int32_t shape_len_ = 0; int32_t storage_offset_ = 0; int32_t shape_[MLLM_TENSOR_SHAPE_MAX_LEN]; int32_t stride_[MLLM_TENSOR_SHAPE_MAX_LEN]; std::shared_ptr storage_ = nullptr; + std::unordered_map attached_views_; }; } // namespace mllm diff --git a/mllm/core/aops/ElewiseOps.cpp b/mllm/core/aops/ElewiseOps.cpp index 85c3027f8..1bf8c60dd 100644 --- a/mllm/core/aops/ElewiseOps.cpp +++ b/mllm/core/aops/ElewiseOps.cpp @@ -112,7 +112,6 @@ __MLLM_ELEWISE_OP_IMPL(kAdd, AddOp); __MLLM_ELEWISE_OP_IMPL(kSub, SubOp); __MLLM_ELEWISE_OP_IMPL(kMul, MulOp); __MLLM_ELEWISE_OP_IMPL(kDiv, DivOp); -__MLLM_ELEWISE_OP_IMPL(kNeg, NegOp); // ---------- Unary Ops __MLLM_ELEWISE_UNARY_OP_IMPL(kAbs, AbsOp); @@ -121,6 +120,7 @@ __MLLM_ELEWISE_UNARY_OP_IMPL(kClip, ClipOp); __MLLM_ELEWISE_UNARY_OP_IMPL(kExp, ExpOp); __MLLM_ELEWISE_UNARY_OP_IMPL(kSin, SinOp); __MLLM_ELEWISE_UNARY_OP_IMPL(kCos, CosOp); +__MLLM_ELEWISE_UNARY_OP_IMPL(kNeg, NegOp); } // namespace mllm::aops diff --git a/mllm/core/aops/ParamOp.cpp b/mllm/core/aops/ParamOp.cpp index a8e10c3d9..ffa161570 100644 --- a/mllm/core/aops/ParamOp.cpp +++ b/mllm/core/aops/ParamOp.cpp @@ -2,6 +2,8 @@ // Licensed under the MIT License. #include "mllm/core/aops/ParamOp.hpp" +#include "mllm/compile/ir/graph/Op.hpp" +#include "mllm/compile/ir/tensor/Op.hpp" #include "mllm/core/BaseOp.hpp" #include "mllm/core/Tensor.hpp" #include "mllm/utils/Common.hpp" @@ -31,14 +33,16 @@ void ParamOp::load(const ParameterFile::ptr_t& ploader) { void ParamOp::trace(void* trace_context, const std::vector& inputs, std::vector& outputs) { auto ir_ctx = (ir::IRContext*)trace_context; - auto i_irs = ir::tensor::wrapTensors2TensorIR(ir_ctx, inputs); - auto o_irs = ir::tensor::wrapTensors2TensorIR(ir_ctx, outputs); - ir_ctx->create(shared_from_this(), i_irs, o_irs); + // Register Params + if (weight_ && !ir_ctx->lookupSymbolTable(getName())) { + ir::IRWriterGuard guard(ir_ctx, ir_ctx->lookupSymbolTable("init")->cast_()->getTopRegion()); + ir_ctx->create(ir_ctx->create(weight_)); + } } void ParamOp::forward(const std::vector& inputs, std::vector& outputs) { MLLM_EMPTY_SCOPE; } -void ParamOp::reshape(const std::vector& inputs, std::vector& outputs) { MLLM_EMPTY_SCOPE; } +void ParamOp::reshape(const std::vector& inputs, std::vector& outputs) { outputs.emplace_back(weight_); } void ParamOp::setup(const std::vector& inputs, std::vector& outputs) { MLLM_EMPTY_SCOPE; } diff --git a/mllm/core/aops/SigmoidOp.cpp b/mllm/core/aops/SigmoidOp.cpp new file mode 100644 index 000000000..a57d89255 --- /dev/null +++ b/mllm/core/aops/SigmoidOp.cpp @@ -0,0 +1,37 @@ +// Copyright (c) MLLM Team. +// Licensed under the MIT License. + +#include "mllm/core/aops/SigmoidOp.hpp" +#include "mllm/core/BaseOp.hpp" +#include "mllm/core/Tensor.hpp" +#include "mllm/utils/Common.hpp" +#include "mllm/compile/ir/linalg/Op.hpp" + +namespace mllm::aops { + +SigmoidOp::SigmoidOp(const SigmoidOpOptions& options) : BaseOp(OpTypes::kSigmoid), options_(options) {} + +void SigmoidOp::load(const ParameterFile::ptr_t& ploader) { MLLM_EMPTY_SCOPE; } + +void SigmoidOp::trace(void* trace_context, const std::vector& inputs, std::vector& outputs) { + auto ir_ctx = (ir::IRContext*)trace_context; + auto i_irs = ir::tensor::wrapTensors2TensorIR(ir_ctx, inputs); + auto o_irs = ir::tensor::wrapTensors2TensorIR(ir_ctx, outputs); + ir_ctx->create(shared_from_this(), i_irs, o_irs); +} + +void SigmoidOp::forward(const std::vector& inputs, std::vector& outputs) { + NYI("SigmoidOp::forward not implemented in aops base."); +} + +void SigmoidOp::reshape(const std::vector& inputs, std::vector& outputs) { + if (options_.isInplace()) { + outputs.emplace_back(inputs[0]); + } else { + outputs.emplace_back(Tensor::empty(inputs[0].shape(), inputs[0].dtype(), inputs[0].device())); + } +} + +void SigmoidOp::setup(const std::vector& inputs, std::vector& outputs) { BaseOp::setup(inputs, outputs); } + +} // namespace mllm::aops diff --git a/mllm/core/aops/SigmoidOp.hpp b/mllm/core/aops/SigmoidOp.hpp new file mode 100644 index 000000000..29c5651d5 --- /dev/null +++ b/mllm/core/aops/SigmoidOp.hpp @@ -0,0 +1,33 @@ +// Copyright (c) MLLM Team. +// Licensed under the MIT License. + +#pragma once + +#include "mllm/core/BaseOp.hpp" +#include "mllm/core/ParameterFile.hpp" + +namespace mllm::aops { + +struct SigmoidOpOptions : public BaseOpOptions {}; + +class SigmoidOp : public BaseOp { + public: + explicit SigmoidOp(const SigmoidOpOptions& options); + + void load(const ParameterFile::ptr_t& ploader) override; + + void trace(void* trace_context, const std::vector& inputs, std::vector& outputs) override; + + void forward(const std::vector& inputs, std::vector& outputs) override; + + void reshape(const std::vector& inputs, std::vector& outputs) override; + + void setup(const std::vector& inputs, std::vector& outputs) override; + + inline SigmoidOpOptions& options() { return options_; } + + protected: + SigmoidOpOptions options_; +}; + +} // namespace mllm::aops diff --git a/mllm/nn/Functional.cpp b/mllm/nn/Functional.cpp index c863ee169..ab0c12f80 100644 --- a/mllm/nn/Functional.cpp +++ b/mllm/nn/Functional.cpp @@ -8,6 +8,7 @@ #include "mllm/core/aops/MatMulOp.hpp" #include "mllm/core/aops/ReduceOps.hpp" #include "mllm/core/aops/Scatter2ShardsOp.hpp" +#include "mllm/core/aops/SigmoidOp.hpp" #include "mllm/core/aops/SoftmaxOp.hpp" #include "mllm/core/aops/ElewiseOps.hpp" #include "mllm/core/aops/SplitOp.hpp" @@ -205,4 +206,9 @@ mllm::Tensor where(const Tensor& mask, const Tensor& original, const Tensor& v) return ctx.buildOpAndSubmitTask(OpTypes::kWhere, aops::WhereOpOptions{}, {mask, original, v})[0]; } +mllm::Tensor sigmoid(const Tensor& x) { + auto& ctx = mllm::Context::instance(); + return ctx.buildOpAndSubmitTask(OpTypes::kSigmoid, aops::SigmoidOpOptions{}, {x})[0]; +} + } // namespace mllm::nn::functional diff --git a/mllm/nn/Functional.hpp b/mllm/nn/Functional.hpp index 1a2f6d3df..bd0cca9dd 100644 --- a/mllm/nn/Functional.hpp +++ b/mllm/nn/Functional.hpp @@ -160,4 +160,6 @@ mllm::Tensor radixAttnRelax(const mllm::Tensor& Q, const mllm::Tensor& K_idx, co mllm::Tensor where(const Tensor& mask, const Tensor& original, const Tensor& v); +mllm::Tensor sigmoid(const Tensor& x); + } // namespace mllm::nn::functional diff --git a/mllm/nn/Module.cpp b/mllm/nn/Module.cpp index d8328bb9e..55b079ac7 100644 --- a/mllm/nn/Module.cpp +++ b/mllm/nn/Module.cpp @@ -22,7 +22,7 @@ void ModuleImpl::load(const ParameterFile::ptr_t& param_file) { case AbstractNnNodeTypes::kLayer: std::static_pointer_cast(hb)->load(param_file); break; } } - resources_mapped_files_.push_back(param_file->getMappedFile()); + resources_mapped_files_.push_back(param_file); } ParameterFile::ptr_t ModuleImpl::params(ModelFileVersion v) { @@ -75,6 +75,11 @@ Tensor ModuleImpl::getBuffer(const std::string& name) { return buffer_[name]; } void ModuleImpl::updateBuffer(const std::string& name, const Tensor& tensor) { buffer_[name] = tensor; } +ParameterFile::ptr_t ModuleImpl::getTopParameterFile() { + if (resources_mapped_files_.empty()) { return nullptr; } + return resources_mapped_files_.back(); +} + Module::Module() { impl_ = std::make_shared(); impl()->setName(""); @@ -93,6 +98,8 @@ ModuleImpl::ptr_t Module::impl() const { return impl_; } void Module::to(DeviceTypes device_type) { impl()->to(device_type); } +ParameterFile::ptr_t Module::getTopParameterFile() { return impl_->getTopParameterFile(); } + void Module::load(const ParameterFile::ptr_t& param_file) { impl_->load(param_file); } std::vector Module::forward(const std::vector& inputs, const std::vector& args) { return {}; } diff --git a/mllm/nn/Module.hpp b/mllm/nn/Module.hpp index 79e79dbb0..4965aa6a5 100644 --- a/mllm/nn/Module.hpp +++ b/mllm/nn/Module.hpp @@ -37,10 +37,12 @@ class ModuleImpl : public AbstractNnNode { void updateBuffer(const std::string& name, const Tensor& tensor); + ParameterFile::ptr_t getTopParameterFile(); + private: /// Buffer is tensors that will not shown in params. And will not be saved. SymbolTable buffer_; - std::vector resources_mapped_files_; + std::vector resources_mapped_files_; }; template @@ -66,6 +68,8 @@ class Module { [[nodiscard]] DeviceTypes device() const { return impl_->getDevice(); } + ParameterFile::ptr_t getTopParameterFile(); + /** * @brief Register a module/layer into this module * diff --git a/pymllm/backends/qualcomm/transformers/core/qdq.py b/pymllm/backends/qualcomm/transformers/core/qdq.py index c7bc351de..ce67729f4 100644 --- a/pymllm/backends/qualcomm/transformers/core/qdq.py +++ b/pymllm/backends/qualcomm/transformers/core/qdq.py @@ -5,46 +5,70 @@ class ActivationQDQ(nn.Module): """ - General activation value pseudo-quantization module (QDQ). - Supports symmetric Per-Tensor quantization, configurable bit numbers (e.g., 8-bit or 16-bit). + General activation Quantization-DeQuantization (QDQ) module. + Supports both Symmetric and Asymmetric (Affine) quantization. + Uses torch.qint32 as a unified type to support various bit-widths. """ - def __init__(self, bits=8, qscheme=torch.per_tensor_symmetric): + def __init__(self, bits=8, qscheme=torch.per_tensor_affine): super().__init__() + self.bits = bits + self.qscheme = qscheme - # 1. Calculate quantization range based on bits - # int8: -128 to 127 - # int16: -32768 to 32767 - self.quant_min = -(2 ** (bits - 1)) - self.quant_max = 2 ** (bits - 1) - 1 + # Define the simulation dtype as qint32 to avoid overflow across different bit-widths + self.dtype = torch.qint32 + + # 1. Calculate quantization range based on bits and scheme + if qscheme in [torch.per_tensor_symmetric, torch.per_channel_symmetric]: + # Symmetric: range is [-(2^(bits-1)), 2^(bits-1) - 1] + # e.g., 8-bit: -128 to 127 + self.quant_min = -(2 ** (bits - 1)) + self.quant_max = 2 ** (bits - 1) - 1 + else: + # Asymmetric (Affine): range is [0, 2^bits - 1] + # e.g., 8-bit: 0 to 255 + self.quant_min = 0 + self.quant_max = (2**bits) - 1 # 2. Initialize FakeQuantize - # For activations, typically use MinMaxObserver or MovingAverageMinMaxObserver + # MinMaxObserver calculates scale and zero_point based on observed tensors. + # Passing quant_min/max to the observer ensures consistency. self.fake_quant = FakeQuantize( - observer=MinMaxObserver.with_args(qscheme=qscheme, dtype=torch.qint32), + observer=MinMaxObserver.with_args( + qscheme=self.qscheme, + dtype=self.dtype, + quant_min=self.quant_min, + quant_max=self.quant_max, + reduce_range=False, + ), quant_min=self.quant_min, quant_max=self.quant_max, - dtype=torch.qint32, - qscheme=qscheme, + dtype=self.dtype, + qscheme=self.qscheme, ) def forward(self, x): - # Directly apply pseudo-quantization. - # When observer is enabled, it continuously updates scale/zp; - # When fakequant is enabled, it simulates quantization errors. + # Applies fake quantization: rounds to nearest integer and clamps to [min, max], + # then dequantizes back to float to simulate quantization noise. return self.fake_quant(x) + # Control methods for quantization-aware training (QAT) def enable_observer(self): + """Enable tracking of min/max values to update scale and zero_point.""" self.fake_quant.enable_observer() def disable_observer(self): + """Freeze scale and zero_point calculation.""" self.fake_quant.disable_observer() def enable_fakequant(self): + """Enable simulation of quantization error.""" self.fake_quant.enable_fakequant() def disable_fakequant(self): + """Disable quantization simulation (act as identity).""" self.fake_quant.disable_fakequant() def extra_repr(self): - return f"bits={self.quant_max.bit_length() + 1}, q_range=({self.quant_min}, {self.quant_max})" + mode = "Symmetric" if "symmetric" in str(self.qscheme) else "Asymmetric" + return f"bits={self.bits}, mode={mode}, q_range=({self.quant_min}, {self.quant_max}), dtype={self.dtype}" diff --git a/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py b/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py index f06019f2a..5148684af 100644 --- a/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py +++ b/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py @@ -206,11 +206,19 @@ def __init__(self, config: Qwen3Config, layer_idx: int): self.k_rope_mul_0_output_qdq = ActivationQDQ(bits=16) self.k_rope_mul_1_output_qdq = ActivationQDQ(bits=16) self.k_rope_add_0_output_qdq = ActivationQDQ(bits=16) - self.k_cast_to_int8_qdq = ActivationQDQ(bits=8) - self.v_cast_to_int8_qdq = ActivationQDQ(bits=8) + + # In qnn, is uint8 sym. + self.k_cast_to_int8_qdq = ActivationQDQ( + bits=8, qscheme=torch.per_tensor_symmetric + ) + self.v_cast_to_int8_qdq = ActivationQDQ( + bits=8, qscheme=torch.per_tensor_symmetric + ) + self.v_cast_to_int16_qdq = ActivationQDQ(bits=16) self.qk_matmul_output_qdq = ActivationQDQ(bits=16) self.scaling_qdq = ActivationQDQ(bits=16) + self.neg_20_qdq = ActivationQDQ(bits=16) self.reduce_min_output_qdq = ActivationQDQ(bits=16) self.mul_0_output_qdq = ActivationQDQ(bits=16) self.minus_0_output_qdq = ActivationQDQ(bits=16) @@ -281,7 +289,12 @@ def forward( attn_min = self.reduce_min_output_qdq( torch.amin(attn_weights, dim=-1, keepdim=True) ) - attn_vv = self.minus_0_output_qdq(attn_min - 20) + attn_vv = self.minus_0_output_qdq( + attn_min + + self.neg_20_qdq( + torch.ones(1, dtype=torch.bfloat16, device=value_states.device) * (-20) + ) + ) attn_weights = torch.where(attention_mask == 0, attn_weights, attn_vv) attn_weights = self.softmax_output_qdq( @@ -589,8 +602,8 @@ def __init__(self, config): super().__init__(config) self.model = Qwen3Model(config) self.vocab_size = config.vocab_size - self.lm_head = QLinearW8A16_PerChannelSym( - config.hidden_size, config.vocab_size, bias=False + self.lm_head = QLinearLPBQ( + config.hidden_size, config.vocab_size, bias=False, block_size=32 ) self.mllm_qualcomm_max_length = None diff --git a/pymllm/backends/qualcomm/transformers/qwen3/train.py b/pymllm/backends/qualcomm/transformers/qwen3/train.py index 746970020..8432e4812 100644 --- a/pymllm/backends/qualcomm/transformers/qwen3/train.py +++ b/pymllm/backends/qualcomm/transformers/qwen3/train.py @@ -1,4 +1,5 @@ import os +import torch import argparse from safetensors.torch import save_model from pymllm.backends.qualcomm.transformers.qwen3.runner import Qwen3Quantizer @@ -39,6 +40,9 @@ def main(): m.calibrate(num_samples=args.num_samples, max_seq_length=args.max_length) # m.compile() m.infer(args.infer_text) + m.model.lm_head.weight = torch.nn.Parameter( + m.model.model.embed_tokens.weight.clone() + ) os.makedirs(args.output_dir, exist_ok=True) model_save_path = os.path.join(args.output_dir, "model.safetensors") diff --git a/pymllm/quantize/pipeline.py b/pymllm/quantize/pipeline.py index 71da013c6..288187fc5 100644 --- a/pymllm/quantize/pipeline.py +++ b/pymllm/quantize/pipeline.py @@ -20,9 +20,15 @@ def build_cast2fp32_pipeline() -> QuantizeSolver: return ret +def build_raw_pipeline() -> QuantizeSolver: + ret = QuantizeSolver() + return ret + + BUILTIN_QUANTIZE_PIPELINE: Dict = { "w4a32_kai_pipeline": build_w4a32_kai_pipeline, "cast2fp32_pipeline": build_cast2fp32_pipeline, + "_raw": build_raw_pipeline, } BUILTIN_QUANTIZE_PASS: Dict = { "w4a32_kai": W4A32KAIQuantizePass, diff --git a/pymllm/utils/mllm_convertor.py b/pymllm/utils/mllm_convertor.py index 7b1aabfb0..d5e8a5c2f 100644 --- a/pymllm/utils/mllm_convertor.py +++ b/pymllm/utils/mllm_convertor.py @@ -66,6 +66,24 @@ def main(): cast_left_2_fp32=True, verbose=args.verbose, ) + elif args.cfg_path is None and args.pipeline is None and args.format == "v2": + cfg = None + pipeline: QuantizeSolver = BUILTIN_QUANTIZE_PIPELINE["_raw"]() + old_param_size = len(params) + new_param_size = pipeline.stream_quantize_params_size(cfg, params) + print(f"Params Num: Before: {old_param_size}, After: {new_param_size}") + pipeline.stream_quantize( + cfg, + params, + writer=ModelFileV2( + args.output_path, + args.model_name, + "Streaming", + max_params_descriptor_buffer_num=new_param_size, + ), + cast_left_2_fp32=False, + verbose=args.verbose, + ) elif ( args.cfg_path is not None and args.pipeline is not None and args.format == "v2" ): From 6d7b5b98e4185614d360ec113c7e4adf62689af3 Mon Sep 17 00:00:00 2001 From: chenghuaWang <2923277184@qq.com> Date: Mon, 5 Jan 2026 03:31:46 +0000 Subject: [PATCH 09/13] fix: tensor attaching view selective hashing --- .../qwen3_qnn_aot/modeling_qwen_qnn_aot.hpp | 21 +- examples/qwen3_qnn_aot/qwen3_qnn_aot.mir | 3921 +++++++++-------- .../qwen3_qnn_aot_quant_recipe.mir | 3508 +++++++-------- mllm/backends/qnn/QNNUtils.hpp | 4 +- mllm/backends/qnn/aot/passes/AOTPipeline.cpp | 4 +- mllm/compile/ir/linalg/Attribute.cpp | 2 + mllm/core/Tensor.cpp | 17 +- mllm/core/Tensor.hpp | 8 +- mllm/core/TensorViewImpl.hpp | 6 +- 9 files changed, 3818 insertions(+), 3673 deletions(-) diff --git a/examples/qwen3_qnn_aot/modeling_qwen_qnn_aot.hpp b/examples/qwen3_qnn_aot/modeling_qwen_qnn_aot.hpp index 1f0da38e7..5677d27f2 100644 --- a/examples/qwen3_qnn_aot/modeling_qwen_qnn_aot.hpp +++ b/examples/qwen3_qnn_aot/modeling_qwen_qnn_aot.hpp @@ -41,8 +41,8 @@ Tensor QDQ(nn::Module* m, Tensor in, const std::string& qdq_name_in_pytorch) { case kUInt16PerTensorAsy: { auto scale = m->getTopParameterFile()->pull(scale_name); auto zp = m->getTopParameterFile()->pull(zp_name); - in.attach("scale", scale.impl()); - in.attach("zero_point", zp.impl()); + in.attach("scale", scale.impl(), true); + in.attach("zero_point", zp.impl(), true); break; } // For Constant! @@ -51,8 +51,8 @@ Tensor QDQ(nn::Module* m, Tensor in, const std::string& qdq_name_in_pytorch) { MLLM_RT_ASSERT_EQ(in.size(-1), 1); auto scale = m->getTopParameterFile()->pull(scale_name); auto zp = m->getTopParameterFile()->pull(zp_name); - in.attach("scale", scale.impl()); - in.attach("zero_point", zp.impl()); + in.attach("scale", scale.impl(), true); + in.attach("zero_point", zp.impl(), true); break; } default: { @@ -76,8 +76,8 @@ Tensor QDQ_KV(nn::Module* m, Tensor in, const std::string& qdq_name_in_pytorch) // Is 128! not 127! auto new_zp = Tensor::constant(128, kInt32).setName(zp_name).setMemType(kParamsNormal); - in.attach("scale", scale.impl()); - in.attach("zero_point", new_zp.impl()); + in.attach("scale", scale.impl(), true); + in.attach("zero_point", new_zp.impl(), true); break; } default: { @@ -372,7 +372,6 @@ class Qwen3Text final : public nn::Module { auto position_ids = inputs[1]; auto causal_mask = inputs[2]; - position_ids = position_ids.squeeze(0); auto llm_embedding_sin = rope_sin_()[{{0}, position_ids, {kAll}}]; auto llm_embedding_cos = rope_cos_()[{{0}, position_ids, {kAll}}]; @@ -459,13 +458,13 @@ class Qwen3ForCausalLM : public ARGeneration, public nn::Module { // For decode phase, increment the last position if (seq_len == 1) { - auto last_pos = *position_ids.offsettedPtr({0, position_ids.shape()[1] - 1}); - position_ids = Tensor::empty({batch_size, 1}, kInt32, kCPU).alloc(); - *position_ids.offsettedPtr({0, 0}) = last_pos + 1; + auto last_pos = *position_ids.offsettedPtr({position_ids.shape()[1] - 1}); + position_ids = Tensor::empty({1}, kInt32, kCPU).alloc(); + *position_ids.offsettedPtr({0}) = last_pos + 1; } } else { // Generate position_ids for prefill phase - position_ids = Tensor::empty({batch_size, seq_len}, kInt32, kCPU).alloc(); + position_ids = Tensor::empty({seq_len}, kInt32, kCPU).alloc(); auto position_ids_ptr = position_ids.ptr(); for (int s = 0; s < seq_len; ++s) { position_ids_ptr[s] = s; } } diff --git a/examples/qwen3_qnn_aot/qwen3_qnn_aot.mir b/examples/qwen3_qnn_aot/qwen3_qnn_aot.mir index 1caff3b4a..200c4982b 100644 --- a/examples/qwen3_qnn_aot/qwen3_qnn_aot.mir +++ b/examples/qwen3_qnn_aot/qwen3_qnn_aot.mir @@ -1,319 +1,319 @@ @main () -> () { graph.SubGraphOp @init [symbol:init] { () -> () { - tensor.CPU.register () -> (%7516:tensor<[151936, 2048], Float32, CPU>[@model.embed_tokens.weight][quant_recipe:QuantSpec(Raw(type: Float32), uuid=61), symbol:model.embed_tokens.weight])[symbol:model.embed_tokens.weight] - tensor.CPU.register () -> (%8011:tensor<[1, 1024, 128], Int16PerTensor, CPU>[@rope_sin][symbol:rope_sin])[symbol:rope_sin] - tensor.CPU.register () -> (%8012:tensor<[1, 1024, 128], Int16PerTensor, CPU>[@rope_cos][symbol:rope_cos])[symbol:rope_cos] - tensor.CPU.register () -> (%6662:tensor<[2048], Float32, CPU>[@model.layers.0.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=67), symbol:model.layers.0.input_layernorm.weight])[symbol:model.layers.0.input_layernorm.weight] - tensor.CPU.register () -> (%7778:tensor<[2048, 2048], Float32, CPU>[@model.layers.0.self_attn.q_proj.weight][symbol:model.layers.0.self_attn.q_proj.weight])[symbol:model.layers.0.self_attn.q_proj.weight] - tensor.CPU.register () -> (%61:tensor<[1024, 2048], Float32, CPU>[@model.layers.0.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=68), symbol:model.layers.0.self_attn.k_proj.weight])[symbol:model.layers.0.self_attn.k_proj.weight] - tensor.CPU.register () -> (%5178:tensor<[1024, 2048], Float32, CPU>[@model.layers.0.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=70), symbol:model.layers.0.self_attn.v_proj.weight])[symbol:model.layers.0.self_attn.v_proj.weight] - tensor.CPU.register () -> (%1867:tensor<[128], Float32, CPU>[@model.layers.0.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=74), symbol:model.layers.0.self_attn.q_norm.weight])[symbol:model.layers.0.self_attn.q_norm.weight] - tensor.CPU.register () -> (%7469:tensor<[128], Float32, CPU>[@model.layers.0.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=76), symbol:model.layers.0.self_attn.k_norm.weight])[symbol:model.layers.0.self_attn.k_norm.weight] - tensor.CPU.register () -> (%7880:tensor<[2048, 2048], Float32, CPU>[@model.layers.0.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=89), symbol:model.layers.0.self_attn.o_proj.weight])[symbol:model.layers.0.self_attn.o_proj.weight] - tensor.CPU.register () -> (%3163:tensor<[2048], Float32, CPU>[@model.layers.0.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=92), symbol:model.layers.0.post_attention_layernorm.weight])[symbol:model.layers.0.post_attention_layernorm.weight] - tensor.CPU.register () -> (%3038:tensor<[6144, 2048], Float32, CPU>[@model.layers.0.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=93), symbol:model.layers.0.mlp.gate_proj.weight])[symbol:model.layers.0.mlp.gate_proj.weight] - tensor.CPU.register () -> (%184:tensor<[6144, 2048], Float32, CPU>[@model.layers.0.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=96), symbol:model.layers.0.mlp.up_proj.weight])[symbol:model.layers.0.mlp.up_proj.weight] - tensor.CPU.register () -> (%7449:tensor<[2048, 6144], Float32, CPU>[@model.layers.0.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=98), symbol:model.layers.0.mlp.down_proj.weight])[symbol:model.layers.0.mlp.down_proj.weight] - tensor.CPU.register () -> (%3526:tensor<[2048], Float32, CPU>[@model.layers.1.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=101), symbol:model.layers.1.input_layernorm.weight])[symbol:model.layers.1.input_layernorm.weight] - tensor.CPU.register () -> (%2471:tensor<[2048, 2048], Float32, CPU>[@model.layers.1.self_attn.q_proj.weight][symbol:model.layers.1.self_attn.q_proj.weight])[symbol:model.layers.1.self_attn.q_proj.weight] - tensor.CPU.register () -> (%5492:tensor<[1024, 2048], Float32, CPU>[@model.layers.1.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=102), symbol:model.layers.1.self_attn.k_proj.weight])[symbol:model.layers.1.self_attn.k_proj.weight] - tensor.CPU.register () -> (%554:tensor<[1024, 2048], Float32, CPU>[@model.layers.1.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=104), symbol:model.layers.1.self_attn.v_proj.weight])[symbol:model.layers.1.self_attn.v_proj.weight] - tensor.CPU.register () -> (%5159:tensor<[128], Float32, CPU>[@model.layers.1.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=108), symbol:model.layers.1.self_attn.q_norm.weight])[symbol:model.layers.1.self_attn.q_norm.weight] - tensor.CPU.register () -> (%6337:tensor<[128], Float32, CPU>[@model.layers.1.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=110), symbol:model.layers.1.self_attn.k_norm.weight])[symbol:model.layers.1.self_attn.k_norm.weight] - tensor.CPU.register () -> (%3431:tensor<[2048, 2048], Float32, CPU>[@model.layers.1.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=123), symbol:model.layers.1.self_attn.o_proj.weight])[symbol:model.layers.1.self_attn.o_proj.weight] - tensor.CPU.register () -> (%7183:tensor<[2048], Float32, CPU>[@model.layers.1.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=126), symbol:model.layers.1.post_attention_layernorm.weight])[symbol:model.layers.1.post_attention_layernorm.weight] - tensor.CPU.register () -> (%6960:tensor<[6144, 2048], Float32, CPU>[@model.layers.1.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=127), symbol:model.layers.1.mlp.gate_proj.weight])[symbol:model.layers.1.mlp.gate_proj.weight] - tensor.CPU.register () -> (%7251:tensor<[6144, 2048], Float32, CPU>[@model.layers.1.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=130), symbol:model.layers.1.mlp.up_proj.weight])[symbol:model.layers.1.mlp.up_proj.weight] - tensor.CPU.register () -> (%6256:tensor<[2048, 6144], Float32, CPU>[@model.layers.1.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=132), symbol:model.layers.1.mlp.down_proj.weight])[symbol:model.layers.1.mlp.down_proj.weight] - tensor.CPU.register () -> (%7411:tensor<[2048], Float32, CPU>[@model.layers.2.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=135), symbol:model.layers.2.input_layernorm.weight])[symbol:model.layers.2.input_layernorm.weight] - tensor.CPU.register () -> (%4879:tensor<[2048, 2048], Float32, CPU>[@model.layers.2.self_attn.q_proj.weight][symbol:model.layers.2.self_attn.q_proj.weight])[symbol:model.layers.2.self_attn.q_proj.weight] - tensor.CPU.register () -> (%725:tensor<[1024, 2048], Float32, CPU>[@model.layers.2.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=136), symbol:model.layers.2.self_attn.k_proj.weight])[symbol:model.layers.2.self_attn.k_proj.weight] - tensor.CPU.register () -> (%2701:tensor<[1024, 2048], Float32, CPU>[@model.layers.2.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=138), symbol:model.layers.2.self_attn.v_proj.weight])[symbol:model.layers.2.self_attn.v_proj.weight] - tensor.CPU.register () -> (%7660:tensor<[128], Float32, CPU>[@model.layers.2.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=142), symbol:model.layers.2.self_attn.q_norm.weight])[symbol:model.layers.2.self_attn.q_norm.weight] - tensor.CPU.register () -> (%5749:tensor<[128], Float32, CPU>[@model.layers.2.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=144), symbol:model.layers.2.self_attn.k_norm.weight])[symbol:model.layers.2.self_attn.k_norm.weight] - tensor.CPU.register () -> (%1525:tensor<[2048, 2048], Float32, CPU>[@model.layers.2.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=157), symbol:model.layers.2.self_attn.o_proj.weight])[symbol:model.layers.2.self_attn.o_proj.weight] - tensor.CPU.register () -> (%6444:tensor<[2048], Float32, CPU>[@model.layers.2.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=160), symbol:model.layers.2.post_attention_layernorm.weight])[symbol:model.layers.2.post_attention_layernorm.weight] - tensor.CPU.register () -> (%3201:tensor<[6144, 2048], Float32, CPU>[@model.layers.2.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=161), symbol:model.layers.2.mlp.gate_proj.weight])[symbol:model.layers.2.mlp.gate_proj.weight] - tensor.CPU.register () -> (%4120:tensor<[6144, 2048], Float32, CPU>[@model.layers.2.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=164), symbol:model.layers.2.mlp.up_proj.weight])[symbol:model.layers.2.mlp.up_proj.weight] - tensor.CPU.register () -> (%1962:tensor<[2048, 6144], Float32, CPU>[@model.layers.2.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=166), symbol:model.layers.2.mlp.down_proj.weight])[symbol:model.layers.2.mlp.down_proj.weight] - tensor.CPU.register () -> (%3250:tensor<[2048], Float32, CPU>[@model.layers.3.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=169), symbol:model.layers.3.input_layernorm.weight])[symbol:model.layers.3.input_layernorm.weight] - tensor.CPU.register () -> (%5564:tensor<[2048, 2048], Float32, CPU>[@model.layers.3.self_attn.q_proj.weight][symbol:model.layers.3.self_attn.q_proj.weight])[symbol:model.layers.3.self_attn.q_proj.weight] - tensor.CPU.register () -> (%3502:tensor<[1024, 2048], Float32, CPU>[@model.layers.3.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=170), symbol:model.layers.3.self_attn.k_proj.weight])[symbol:model.layers.3.self_attn.k_proj.weight] - tensor.CPU.register () -> (%2402:tensor<[1024, 2048], Float32, CPU>[@model.layers.3.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=172), symbol:model.layers.3.self_attn.v_proj.weight])[symbol:model.layers.3.self_attn.v_proj.weight] - tensor.CPU.register () -> (%1747:tensor<[128], Float32, CPU>[@model.layers.3.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=176), symbol:model.layers.3.self_attn.q_norm.weight])[symbol:model.layers.3.self_attn.q_norm.weight] - tensor.CPU.register () -> (%4846:tensor<[128], Float32, CPU>[@model.layers.3.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=178), symbol:model.layers.3.self_attn.k_norm.weight])[symbol:model.layers.3.self_attn.k_norm.weight] - tensor.CPU.register () -> (%3109:tensor<[2048, 2048], Float32, CPU>[@model.layers.3.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=191), symbol:model.layers.3.self_attn.o_proj.weight])[symbol:model.layers.3.self_attn.o_proj.weight] - tensor.CPU.register () -> (%7221:tensor<[2048], Float32, CPU>[@model.layers.3.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=194), symbol:model.layers.3.post_attention_layernorm.weight])[symbol:model.layers.3.post_attention_layernorm.weight] - tensor.CPU.register () -> (%7181:tensor<[6144, 2048], Float32, CPU>[@model.layers.3.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=195), symbol:model.layers.3.mlp.gate_proj.weight])[symbol:model.layers.3.mlp.gate_proj.weight] - tensor.CPU.register () -> (%2714:tensor<[6144, 2048], Float32, CPU>[@model.layers.3.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=198), symbol:model.layers.3.mlp.up_proj.weight])[symbol:model.layers.3.mlp.up_proj.weight] - tensor.CPU.register () -> (%4573:tensor<[2048, 6144], Float32, CPU>[@model.layers.3.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=200), symbol:model.layers.3.mlp.down_proj.weight])[symbol:model.layers.3.mlp.down_proj.weight] - tensor.CPU.register () -> (%5536:tensor<[2048], Float32, CPU>[@model.layers.4.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=203), symbol:model.layers.4.input_layernorm.weight])[symbol:model.layers.4.input_layernorm.weight] - tensor.CPU.register () -> (%463:tensor<[2048, 2048], Float32, CPU>[@model.layers.4.self_attn.q_proj.weight][symbol:model.layers.4.self_attn.q_proj.weight])[symbol:model.layers.4.self_attn.q_proj.weight] - tensor.CPU.register () -> (%5989:tensor<[1024, 2048], Float32, CPU>[@model.layers.4.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=204), symbol:model.layers.4.self_attn.k_proj.weight])[symbol:model.layers.4.self_attn.k_proj.weight] - tensor.CPU.register () -> (%3443:tensor<[1024, 2048], Float32, CPU>[@model.layers.4.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=206), symbol:model.layers.4.self_attn.v_proj.weight])[symbol:model.layers.4.self_attn.v_proj.weight] - tensor.CPU.register () -> (%926:tensor<[128], Float32, CPU>[@model.layers.4.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=210), symbol:model.layers.4.self_attn.q_norm.weight])[symbol:model.layers.4.self_attn.q_norm.weight] - tensor.CPU.register () -> (%5648:tensor<[128], Float32, CPU>[@model.layers.4.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=212), symbol:model.layers.4.self_attn.k_norm.weight])[symbol:model.layers.4.self_attn.k_norm.weight] - tensor.CPU.register () -> (%256:tensor<[2048, 2048], Float32, CPU>[@model.layers.4.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=225), symbol:model.layers.4.self_attn.o_proj.weight])[symbol:model.layers.4.self_attn.o_proj.weight] - tensor.CPU.register () -> (%3101:tensor<[2048], Float32, CPU>[@model.layers.4.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=228), symbol:model.layers.4.post_attention_layernorm.weight])[symbol:model.layers.4.post_attention_layernorm.weight] - tensor.CPU.register () -> (%15:tensor<[6144, 2048], Float32, CPU>[@model.layers.4.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=229), symbol:model.layers.4.mlp.gate_proj.weight])[symbol:model.layers.4.mlp.gate_proj.weight] - tensor.CPU.register () -> (%3494:tensor<[6144, 2048], Float32, CPU>[@model.layers.4.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=232), symbol:model.layers.4.mlp.up_proj.weight])[symbol:model.layers.4.mlp.up_proj.weight] - tensor.CPU.register () -> (%6518:tensor<[2048, 6144], Float32, CPU>[@model.layers.4.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=234), symbol:model.layers.4.mlp.down_proj.weight])[symbol:model.layers.4.mlp.down_proj.weight] - tensor.CPU.register () -> (%7246:tensor<[2048], Float32, CPU>[@model.layers.5.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=237), symbol:model.layers.5.input_layernorm.weight])[symbol:model.layers.5.input_layernorm.weight] - tensor.CPU.register () -> (%3752:tensor<[2048, 2048], Float32, CPU>[@model.layers.5.self_attn.q_proj.weight][symbol:model.layers.5.self_attn.q_proj.weight])[symbol:model.layers.5.self_attn.q_proj.weight] - tensor.CPU.register () -> (%2143:tensor<[1024, 2048], Float32, CPU>[@model.layers.5.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=238), symbol:model.layers.5.self_attn.k_proj.weight])[symbol:model.layers.5.self_attn.k_proj.weight] - tensor.CPU.register () -> (%5753:tensor<[1024, 2048], Float32, CPU>[@model.layers.5.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=240), symbol:model.layers.5.self_attn.v_proj.weight])[symbol:model.layers.5.self_attn.v_proj.weight] - tensor.CPU.register () -> (%4774:tensor<[128], Float32, CPU>[@model.layers.5.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=244), symbol:model.layers.5.self_attn.q_norm.weight])[symbol:model.layers.5.self_attn.q_norm.weight] - tensor.CPU.register () -> (%1215:tensor<[128], Float32, CPU>[@model.layers.5.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=246), symbol:model.layers.5.self_attn.k_norm.weight])[symbol:model.layers.5.self_attn.k_norm.weight] - tensor.CPU.register () -> (%2076:tensor<[2048, 2048], Float32, CPU>[@model.layers.5.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=259), symbol:model.layers.5.self_attn.o_proj.weight])[symbol:model.layers.5.self_attn.o_proj.weight] - tensor.CPU.register () -> (%6883:tensor<[2048], Float32, CPU>[@model.layers.5.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=262), symbol:model.layers.5.post_attention_layernorm.weight])[symbol:model.layers.5.post_attention_layernorm.weight] - tensor.CPU.register () -> (%5485:tensor<[6144, 2048], Float32, CPU>[@model.layers.5.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=263), symbol:model.layers.5.mlp.gate_proj.weight])[symbol:model.layers.5.mlp.gate_proj.weight] - tensor.CPU.register () -> (%759:tensor<[6144, 2048], Float32, CPU>[@model.layers.5.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=266), symbol:model.layers.5.mlp.up_proj.weight])[symbol:model.layers.5.mlp.up_proj.weight] - tensor.CPU.register () -> (%6315:tensor<[2048, 6144], Float32, CPU>[@model.layers.5.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=268), symbol:model.layers.5.mlp.down_proj.weight])[symbol:model.layers.5.mlp.down_proj.weight] - tensor.CPU.register () -> (%7090:tensor<[2048], Float32, CPU>[@model.layers.6.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=271), symbol:model.layers.6.input_layernorm.weight])[symbol:model.layers.6.input_layernorm.weight] - tensor.CPU.register () -> (%3125:tensor<[2048, 2048], Float32, CPU>[@model.layers.6.self_attn.q_proj.weight][symbol:model.layers.6.self_attn.q_proj.weight])[symbol:model.layers.6.self_attn.q_proj.weight] - tensor.CPU.register () -> (%1798:tensor<[1024, 2048], Float32, CPU>[@model.layers.6.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=272), symbol:model.layers.6.self_attn.k_proj.weight])[symbol:model.layers.6.self_attn.k_proj.weight] - tensor.CPU.register () -> (%1047:tensor<[1024, 2048], Float32, CPU>[@model.layers.6.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=274), symbol:model.layers.6.self_attn.v_proj.weight])[symbol:model.layers.6.self_attn.v_proj.weight] - tensor.CPU.register () -> (%7385:tensor<[128], Float32, CPU>[@model.layers.6.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=278), symbol:model.layers.6.self_attn.q_norm.weight])[symbol:model.layers.6.self_attn.q_norm.weight] - tensor.CPU.register () -> (%5603:tensor<[128], Float32, CPU>[@model.layers.6.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=280), symbol:model.layers.6.self_attn.k_norm.weight])[symbol:model.layers.6.self_attn.k_norm.weight] - tensor.CPU.register () -> (%6862:tensor<[2048, 2048], Float32, CPU>[@model.layers.6.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=293), symbol:model.layers.6.self_attn.o_proj.weight])[symbol:model.layers.6.self_attn.o_proj.weight] - tensor.CPU.register () -> (%4161:tensor<[2048], Float32, CPU>[@model.layers.6.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=296), symbol:model.layers.6.post_attention_layernorm.weight])[symbol:model.layers.6.post_attention_layernorm.weight] - tensor.CPU.register () -> (%5295:tensor<[6144, 2048], Float32, CPU>[@model.layers.6.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=297), symbol:model.layers.6.mlp.gate_proj.weight])[symbol:model.layers.6.mlp.gate_proj.weight] - tensor.CPU.register () -> (%4710:tensor<[6144, 2048], Float32, CPU>[@model.layers.6.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=300), symbol:model.layers.6.mlp.up_proj.weight])[symbol:model.layers.6.mlp.up_proj.weight] - tensor.CPU.register () -> (%4929:tensor<[2048, 6144], Float32, CPU>[@model.layers.6.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=302), symbol:model.layers.6.mlp.down_proj.weight])[symbol:model.layers.6.mlp.down_proj.weight] - tensor.CPU.register () -> (%4605:tensor<[2048], Float32, CPU>[@model.layers.7.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=305), symbol:model.layers.7.input_layernorm.weight])[symbol:model.layers.7.input_layernorm.weight] - tensor.CPU.register () -> (%4585:tensor<[2048, 2048], Float32, CPU>[@model.layers.7.self_attn.q_proj.weight][symbol:model.layers.7.self_attn.q_proj.weight])[symbol:model.layers.7.self_attn.q_proj.weight] - tensor.CPU.register () -> (%1:tensor<[1024, 2048], Float32, CPU>[@model.layers.7.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=306), symbol:model.layers.7.self_attn.k_proj.weight])[symbol:model.layers.7.self_attn.k_proj.weight] - tensor.CPU.register () -> (%2341:tensor<[1024, 2048], Float32, CPU>[@model.layers.7.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=308), symbol:model.layers.7.self_attn.v_proj.weight])[symbol:model.layers.7.self_attn.v_proj.weight] - tensor.CPU.register () -> (%5151:tensor<[128], Float32, CPU>[@model.layers.7.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=312), symbol:model.layers.7.self_attn.q_norm.weight])[symbol:model.layers.7.self_attn.q_norm.weight] - tensor.CPU.register () -> (%3437:tensor<[128], Float32, CPU>[@model.layers.7.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=314), symbol:model.layers.7.self_attn.k_norm.weight])[symbol:model.layers.7.self_attn.k_norm.weight] - tensor.CPU.register () -> (%3368:tensor<[2048, 2048], Float32, CPU>[@model.layers.7.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=327), symbol:model.layers.7.self_attn.o_proj.weight])[symbol:model.layers.7.self_attn.o_proj.weight] - tensor.CPU.register () -> (%68:tensor<[2048], Float32, CPU>[@model.layers.7.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=330), symbol:model.layers.7.post_attention_layernorm.weight])[symbol:model.layers.7.post_attention_layernorm.weight] - tensor.CPU.register () -> (%324:tensor<[6144, 2048], Float32, CPU>[@model.layers.7.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=331), symbol:model.layers.7.mlp.gate_proj.weight])[symbol:model.layers.7.mlp.gate_proj.weight] - tensor.CPU.register () -> (%5551:tensor<[6144, 2048], Float32, CPU>[@model.layers.7.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=334), symbol:model.layers.7.mlp.up_proj.weight])[symbol:model.layers.7.mlp.up_proj.weight] - tensor.CPU.register () -> (%7894:tensor<[2048, 6144], Float32, CPU>[@model.layers.7.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=336), symbol:model.layers.7.mlp.down_proj.weight])[symbol:model.layers.7.mlp.down_proj.weight] - tensor.CPU.register () -> (%3851:tensor<[2048], Float32, CPU>[@model.layers.8.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=339), symbol:model.layers.8.input_layernorm.weight])[symbol:model.layers.8.input_layernorm.weight] - tensor.CPU.register () -> (%5874:tensor<[2048, 2048], Float32, CPU>[@model.layers.8.self_attn.q_proj.weight][symbol:model.layers.8.self_attn.q_proj.weight])[symbol:model.layers.8.self_attn.q_proj.weight] - tensor.CPU.register () -> (%1863:tensor<[1024, 2048], Float32, CPU>[@model.layers.8.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=340), symbol:model.layers.8.self_attn.k_proj.weight])[symbol:model.layers.8.self_attn.k_proj.weight] - tensor.CPU.register () -> (%3204:tensor<[1024, 2048], Float32, CPU>[@model.layers.8.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=342), symbol:model.layers.8.self_attn.v_proj.weight])[symbol:model.layers.8.self_attn.v_proj.weight] - tensor.CPU.register () -> (%2301:tensor<[128], Float32, CPU>[@model.layers.8.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=346), symbol:model.layers.8.self_attn.q_norm.weight])[symbol:model.layers.8.self_attn.q_norm.weight] - tensor.CPU.register () -> (%7373:tensor<[128], Float32, CPU>[@model.layers.8.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=348), symbol:model.layers.8.self_attn.k_norm.weight])[symbol:model.layers.8.self_attn.k_norm.weight] - tensor.CPU.register () -> (%6303:tensor<[2048, 2048], Float32, CPU>[@model.layers.8.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=361), symbol:model.layers.8.self_attn.o_proj.weight])[symbol:model.layers.8.self_attn.o_proj.weight] - tensor.CPU.register () -> (%1997:tensor<[2048], Float32, CPU>[@model.layers.8.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364), symbol:model.layers.8.post_attention_layernorm.weight])[symbol:model.layers.8.post_attention_layernorm.weight] - tensor.CPU.register () -> (%6731:tensor<[6144, 2048], Float32, CPU>[@model.layers.8.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=365), symbol:model.layers.8.mlp.gate_proj.weight])[symbol:model.layers.8.mlp.gate_proj.weight] - tensor.CPU.register () -> (%5478:tensor<[6144, 2048], Float32, CPU>[@model.layers.8.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=368), symbol:model.layers.8.mlp.up_proj.weight])[symbol:model.layers.8.mlp.up_proj.weight] - tensor.CPU.register () -> (%4734:tensor<[2048, 6144], Float32, CPU>[@model.layers.8.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=370), symbol:model.layers.8.mlp.down_proj.weight])[symbol:model.layers.8.mlp.down_proj.weight] - tensor.CPU.register () -> (%4963:tensor<[2048], Float32, CPU>[@model.layers.9.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=373), symbol:model.layers.9.input_layernorm.weight])[symbol:model.layers.9.input_layernorm.weight] - tensor.CPU.register () -> (%137:tensor<[2048, 2048], Float32, CPU>[@model.layers.9.self_attn.q_proj.weight][symbol:model.layers.9.self_attn.q_proj.weight])[symbol:model.layers.9.self_attn.q_proj.weight] - tensor.CPU.register () -> (%2689:tensor<[1024, 2048], Float32, CPU>[@model.layers.9.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=374), symbol:model.layers.9.self_attn.k_proj.weight])[symbol:model.layers.9.self_attn.k_proj.weight] - tensor.CPU.register () -> (%4027:tensor<[1024, 2048], Float32, CPU>[@model.layers.9.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=376), symbol:model.layers.9.self_attn.v_proj.weight])[symbol:model.layers.9.self_attn.v_proj.weight] - tensor.CPU.register () -> (%1375:tensor<[128], Float32, CPU>[@model.layers.9.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=380), symbol:model.layers.9.self_attn.q_norm.weight])[symbol:model.layers.9.self_attn.q_norm.weight] - tensor.CPU.register () -> (%4962:tensor<[128], Float32, CPU>[@model.layers.9.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=382), symbol:model.layers.9.self_attn.k_norm.weight])[symbol:model.layers.9.self_attn.k_norm.weight] - tensor.CPU.register () -> (%6399:tensor<[2048, 2048], Float32, CPU>[@model.layers.9.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=395), symbol:model.layers.9.self_attn.o_proj.weight])[symbol:model.layers.9.self_attn.o_proj.weight] - tensor.CPU.register () -> (%2594:tensor<[2048], Float32, CPU>[@model.layers.9.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=398), symbol:model.layers.9.post_attention_layernorm.weight])[symbol:model.layers.9.post_attention_layernorm.weight] - tensor.CPU.register () -> (%3833:tensor<[6144, 2048], Float32, CPU>[@model.layers.9.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=399), symbol:model.layers.9.mlp.gate_proj.weight])[symbol:model.layers.9.mlp.gate_proj.weight] - tensor.CPU.register () -> (%2358:tensor<[6144, 2048], Float32, CPU>[@model.layers.9.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=402), symbol:model.layers.9.mlp.up_proj.weight])[symbol:model.layers.9.mlp.up_proj.weight] - tensor.CPU.register () -> (%3947:tensor<[2048, 6144], Float32, CPU>[@model.layers.9.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=404), symbol:model.layers.9.mlp.down_proj.weight])[symbol:model.layers.9.mlp.down_proj.weight] - tensor.CPU.register () -> (%3229:tensor<[2048], Float32, CPU>[@model.layers.10.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=407), symbol:model.layers.10.input_layernorm.weight])[symbol:model.layers.10.input_layernorm.weight] - tensor.CPU.register () -> (%5022:tensor<[2048, 2048], Float32, CPU>[@model.layers.10.self_attn.q_proj.weight][symbol:model.layers.10.self_attn.q_proj.weight])[symbol:model.layers.10.self_attn.q_proj.weight] - tensor.CPU.register () -> (%2867:tensor<[1024, 2048], Float32, CPU>[@model.layers.10.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=408), symbol:model.layers.10.self_attn.k_proj.weight])[symbol:model.layers.10.self_attn.k_proj.weight] - tensor.CPU.register () -> (%567:tensor<[1024, 2048], Float32, CPU>[@model.layers.10.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=410), symbol:model.layers.10.self_attn.v_proj.weight])[symbol:model.layers.10.self_attn.v_proj.weight] - tensor.CPU.register () -> (%7008:tensor<[128], Float32, CPU>[@model.layers.10.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=414), symbol:model.layers.10.self_attn.q_norm.weight])[symbol:model.layers.10.self_attn.q_norm.weight] - tensor.CPU.register () -> (%6953:tensor<[128], Float32, CPU>[@model.layers.10.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=416), symbol:model.layers.10.self_attn.k_norm.weight])[symbol:model.layers.10.self_attn.k_norm.weight] - tensor.CPU.register () -> (%5479:tensor<[2048, 2048], Float32, CPU>[@model.layers.10.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=429), symbol:model.layers.10.self_attn.o_proj.weight])[symbol:model.layers.10.self_attn.o_proj.weight] - tensor.CPU.register () -> (%3177:tensor<[2048], Float32, CPU>[@model.layers.10.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=432), symbol:model.layers.10.post_attention_layernorm.weight])[symbol:model.layers.10.post_attention_layernorm.weight] - tensor.CPU.register () -> (%7857:tensor<[6144, 2048], Float32, CPU>[@model.layers.10.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=433), symbol:model.layers.10.mlp.gate_proj.weight])[symbol:model.layers.10.mlp.gate_proj.weight] - tensor.CPU.register () -> (%3620:tensor<[6144, 2048], Float32, CPU>[@model.layers.10.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=436), symbol:model.layers.10.mlp.up_proj.weight])[symbol:model.layers.10.mlp.up_proj.weight] - tensor.CPU.register () -> (%4172:tensor<[2048, 6144], Float32, CPU>[@model.layers.10.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=438), symbol:model.layers.10.mlp.down_proj.weight])[symbol:model.layers.10.mlp.down_proj.weight] - tensor.CPU.register () -> (%1820:tensor<[2048], Float32, CPU>[@model.layers.11.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=441), symbol:model.layers.11.input_layernorm.weight])[symbol:model.layers.11.input_layernorm.weight] - tensor.CPU.register () -> (%4375:tensor<[2048, 2048], Float32, CPU>[@model.layers.11.self_attn.q_proj.weight][symbol:model.layers.11.self_attn.q_proj.weight])[symbol:model.layers.11.self_attn.q_proj.weight] - tensor.CPU.register () -> (%3805:tensor<[1024, 2048], Float32, CPU>[@model.layers.11.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=442), symbol:model.layers.11.self_attn.k_proj.weight])[symbol:model.layers.11.self_attn.k_proj.weight] - tensor.CPU.register () -> (%5348:tensor<[1024, 2048], Float32, CPU>[@model.layers.11.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=444), symbol:model.layers.11.self_attn.v_proj.weight])[symbol:model.layers.11.self_attn.v_proj.weight] - tensor.CPU.register () -> (%1018:tensor<[128], Float32, CPU>[@model.layers.11.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=448), symbol:model.layers.11.self_attn.q_norm.weight])[symbol:model.layers.11.self_attn.q_norm.weight] - tensor.CPU.register () -> (%5323:tensor<[128], Float32, CPU>[@model.layers.11.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=450), symbol:model.layers.11.self_attn.k_norm.weight])[symbol:model.layers.11.self_attn.k_norm.weight] - tensor.CPU.register () -> (%6587:tensor<[2048, 2048], Float32, CPU>[@model.layers.11.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=463), symbol:model.layers.11.self_attn.o_proj.weight])[symbol:model.layers.11.self_attn.o_proj.weight] - tensor.CPU.register () -> (%2072:tensor<[2048], Float32, CPU>[@model.layers.11.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=466), symbol:model.layers.11.post_attention_layernorm.weight])[symbol:model.layers.11.post_attention_layernorm.weight] - tensor.CPU.register () -> (%5180:tensor<[6144, 2048], Float32, CPU>[@model.layers.11.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=467), symbol:model.layers.11.mlp.gate_proj.weight])[symbol:model.layers.11.mlp.gate_proj.weight] - tensor.CPU.register () -> (%1917:tensor<[6144, 2048], Float32, CPU>[@model.layers.11.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=470), symbol:model.layers.11.mlp.up_proj.weight])[symbol:model.layers.11.mlp.up_proj.weight] - tensor.CPU.register () -> (%2810:tensor<[2048, 6144], Float32, CPU>[@model.layers.11.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=472), symbol:model.layers.11.mlp.down_proj.weight])[symbol:model.layers.11.mlp.down_proj.weight] - tensor.CPU.register () -> (%4945:tensor<[2048], Float32, CPU>[@model.layers.12.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=475), symbol:model.layers.12.input_layernorm.weight])[symbol:model.layers.12.input_layernorm.weight] - tensor.CPU.register () -> (%6926:tensor<[2048, 2048], Float32, CPU>[@model.layers.12.self_attn.q_proj.weight][symbol:model.layers.12.self_attn.q_proj.weight])[symbol:model.layers.12.self_attn.q_proj.weight] - tensor.CPU.register () -> (%2741:tensor<[1024, 2048], Float32, CPU>[@model.layers.12.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=476), symbol:model.layers.12.self_attn.k_proj.weight])[symbol:model.layers.12.self_attn.k_proj.weight] - tensor.CPU.register () -> (%3690:tensor<[1024, 2048], Float32, CPU>[@model.layers.12.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=478), symbol:model.layers.12.self_attn.v_proj.weight])[symbol:model.layers.12.self_attn.v_proj.weight] - tensor.CPU.register () -> (%5447:tensor<[128], Float32, CPU>[@model.layers.12.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=482), symbol:model.layers.12.self_attn.q_norm.weight])[symbol:model.layers.12.self_attn.q_norm.weight] - tensor.CPU.register () -> (%5437:tensor<[128], Float32, CPU>[@model.layers.12.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484), symbol:model.layers.12.self_attn.k_norm.weight])[symbol:model.layers.12.self_attn.k_norm.weight] - tensor.CPU.register () -> (%4785:tensor<[2048, 2048], Float32, CPU>[@model.layers.12.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=497), symbol:model.layers.12.self_attn.o_proj.weight])[symbol:model.layers.12.self_attn.o_proj.weight] - tensor.CPU.register () -> (%1343:tensor<[2048], Float32, CPU>[@model.layers.12.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=500), symbol:model.layers.12.post_attention_layernorm.weight])[symbol:model.layers.12.post_attention_layernorm.weight] - tensor.CPU.register () -> (%3306:tensor<[6144, 2048], Float32, CPU>[@model.layers.12.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=501), symbol:model.layers.12.mlp.gate_proj.weight])[symbol:model.layers.12.mlp.gate_proj.weight] - tensor.CPU.register () -> (%2123:tensor<[6144, 2048], Float32, CPU>[@model.layers.12.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=504), symbol:model.layers.12.mlp.up_proj.weight])[symbol:model.layers.12.mlp.up_proj.weight] - tensor.CPU.register () -> (%2005:tensor<[2048, 6144], Float32, CPU>[@model.layers.12.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=506), symbol:model.layers.12.mlp.down_proj.weight])[symbol:model.layers.12.mlp.down_proj.weight] - tensor.CPU.register () -> (%1812:tensor<[2048], Float32, CPU>[@model.layers.13.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=509), symbol:model.layers.13.input_layernorm.weight])[symbol:model.layers.13.input_layernorm.weight] - tensor.CPU.register () -> (%7043:tensor<[2048, 2048], Float32, CPU>[@model.layers.13.self_attn.q_proj.weight][symbol:model.layers.13.self_attn.q_proj.weight])[symbol:model.layers.13.self_attn.q_proj.weight] - tensor.CPU.register () -> (%229:tensor<[1024, 2048], Float32, CPU>[@model.layers.13.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=510), symbol:model.layers.13.self_attn.k_proj.weight])[symbol:model.layers.13.self_attn.k_proj.weight] - tensor.CPU.register () -> (%1019:tensor<[1024, 2048], Float32, CPU>[@model.layers.13.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=512), symbol:model.layers.13.self_attn.v_proj.weight])[symbol:model.layers.13.self_attn.v_proj.weight] - tensor.CPU.register () -> (%3318:tensor<[128], Float32, CPU>[@model.layers.13.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=516), symbol:model.layers.13.self_attn.q_norm.weight])[symbol:model.layers.13.self_attn.q_norm.weight] - tensor.CPU.register () -> (%2503:tensor<[128], Float32, CPU>[@model.layers.13.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=518), symbol:model.layers.13.self_attn.k_norm.weight])[symbol:model.layers.13.self_attn.k_norm.weight] - tensor.CPU.register () -> (%3883:tensor<[2048, 2048], Float32, CPU>[@model.layers.13.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=531), symbol:model.layers.13.self_attn.o_proj.weight])[symbol:model.layers.13.self_attn.o_proj.weight] - tensor.CPU.register () -> (%6904:tensor<[2048], Float32, CPU>[@model.layers.13.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=534), symbol:model.layers.13.post_attention_layernorm.weight])[symbol:model.layers.13.post_attention_layernorm.weight] - tensor.CPU.register () -> (%5444:tensor<[6144, 2048], Float32, CPU>[@model.layers.13.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=535), symbol:model.layers.13.mlp.gate_proj.weight])[symbol:model.layers.13.mlp.gate_proj.weight] - tensor.CPU.register () -> (%3100:tensor<[6144, 2048], Float32, CPU>[@model.layers.13.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=538), symbol:model.layers.13.mlp.up_proj.weight])[symbol:model.layers.13.mlp.up_proj.weight] - tensor.CPU.register () -> (%6631:tensor<[2048, 6144], Float32, CPU>[@model.layers.13.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=540), symbol:model.layers.13.mlp.down_proj.weight])[symbol:model.layers.13.mlp.down_proj.weight] - tensor.CPU.register () -> (%5555:tensor<[2048], Float32, CPU>[@model.layers.14.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=543), symbol:model.layers.14.input_layernorm.weight])[symbol:model.layers.14.input_layernorm.weight] - tensor.CPU.register () -> (%1210:tensor<[2048, 2048], Float32, CPU>[@model.layers.14.self_attn.q_proj.weight][symbol:model.layers.14.self_attn.q_proj.weight])[symbol:model.layers.14.self_attn.q_proj.weight] - tensor.CPU.register () -> (%3756:tensor<[1024, 2048], Float32, CPU>[@model.layers.14.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=544), symbol:model.layers.14.self_attn.k_proj.weight])[symbol:model.layers.14.self_attn.k_proj.weight] - tensor.CPU.register () -> (%5243:tensor<[1024, 2048], Float32, CPU>[@model.layers.14.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=546), symbol:model.layers.14.self_attn.v_proj.weight])[symbol:model.layers.14.self_attn.v_proj.weight] - tensor.CPU.register () -> (%3796:tensor<[128], Float32, CPU>[@model.layers.14.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=550), symbol:model.layers.14.self_attn.q_norm.weight])[symbol:model.layers.14.self_attn.q_norm.weight] - tensor.CPU.register () -> (%3974:tensor<[128], Float32, CPU>[@model.layers.14.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=552), symbol:model.layers.14.self_attn.k_norm.weight])[symbol:model.layers.14.self_attn.k_norm.weight] - tensor.CPU.register () -> (%3797:tensor<[2048, 2048], Float32, CPU>[@model.layers.14.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=565), symbol:model.layers.14.self_attn.o_proj.weight])[symbol:model.layers.14.self_attn.o_proj.weight] - tensor.CPU.register () -> (%4508:tensor<[2048], Float32, CPU>[@model.layers.14.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=568), symbol:model.layers.14.post_attention_layernorm.weight])[symbol:model.layers.14.post_attention_layernorm.weight] - tensor.CPU.register () -> (%7092:tensor<[6144, 2048], Float32, CPU>[@model.layers.14.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=569), symbol:model.layers.14.mlp.gate_proj.weight])[symbol:model.layers.14.mlp.gate_proj.weight] - tensor.CPU.register () -> (%7164:tensor<[6144, 2048], Float32, CPU>[@model.layers.14.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=572), symbol:model.layers.14.mlp.up_proj.weight])[symbol:model.layers.14.mlp.up_proj.weight] - tensor.CPU.register () -> (%4419:tensor<[2048, 6144], Float32, CPU>[@model.layers.14.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=574), symbol:model.layers.14.mlp.down_proj.weight])[symbol:model.layers.14.mlp.down_proj.weight] - tensor.CPU.register () -> (%5590:tensor<[2048], Float32, CPU>[@model.layers.15.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=577), symbol:model.layers.15.input_layernorm.weight])[symbol:model.layers.15.input_layernorm.weight] - tensor.CPU.register () -> (%5843:tensor<[2048, 2048], Float32, CPU>[@model.layers.15.self_attn.q_proj.weight][symbol:model.layers.15.self_attn.q_proj.weight])[symbol:model.layers.15.self_attn.q_proj.weight] - tensor.CPU.register () -> (%938:tensor<[1024, 2048], Float32, CPU>[@model.layers.15.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=578), symbol:model.layers.15.self_attn.k_proj.weight])[symbol:model.layers.15.self_attn.k_proj.weight] - tensor.CPU.register () -> (%3967:tensor<[1024, 2048], Float32, CPU>[@model.layers.15.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=580), symbol:model.layers.15.self_attn.v_proj.weight])[symbol:model.layers.15.self_attn.v_proj.weight] - tensor.CPU.register () -> (%3289:tensor<[128], Float32, CPU>[@model.layers.15.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=584), symbol:model.layers.15.self_attn.q_norm.weight])[symbol:model.layers.15.self_attn.q_norm.weight] - tensor.CPU.register () -> (%6756:tensor<[128], Float32, CPU>[@model.layers.15.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=586), symbol:model.layers.15.self_attn.k_norm.weight])[symbol:model.layers.15.self_attn.k_norm.weight] - tensor.CPU.register () -> (%4838:tensor<[2048, 2048], Float32, CPU>[@model.layers.15.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=599), symbol:model.layers.15.self_attn.o_proj.weight])[symbol:model.layers.15.self_attn.o_proj.weight] - tensor.CPU.register () -> (%6774:tensor<[2048], Float32, CPU>[@model.layers.15.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=602), symbol:model.layers.15.post_attention_layernorm.weight])[symbol:model.layers.15.post_attention_layernorm.weight] - tensor.CPU.register () -> (%2819:tensor<[6144, 2048], Float32, CPU>[@model.layers.15.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=603), symbol:model.layers.15.mlp.gate_proj.weight])[symbol:model.layers.15.mlp.gate_proj.weight] - tensor.CPU.register () -> (%1377:tensor<[6144, 2048], Float32, CPU>[@model.layers.15.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=606), symbol:model.layers.15.mlp.up_proj.weight])[symbol:model.layers.15.mlp.up_proj.weight] - tensor.CPU.register () -> (%526:tensor<[2048, 6144], Float32, CPU>[@model.layers.15.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=608), symbol:model.layers.15.mlp.down_proj.weight])[symbol:model.layers.15.mlp.down_proj.weight] - tensor.CPU.register () -> (%369:tensor<[2048], Float32, CPU>[@model.layers.16.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=611), symbol:model.layers.16.input_layernorm.weight])[symbol:model.layers.16.input_layernorm.weight] - tensor.CPU.register () -> (%2345:tensor<[2048, 2048], Float32, CPU>[@model.layers.16.self_attn.q_proj.weight][symbol:model.layers.16.self_attn.q_proj.weight])[symbol:model.layers.16.self_attn.q_proj.weight] - tensor.CPU.register () -> (%3022:tensor<[1024, 2048], Float32, CPU>[@model.layers.16.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=612), symbol:model.layers.16.self_attn.k_proj.weight])[symbol:model.layers.16.self_attn.k_proj.weight] - tensor.CPU.register () -> (%2931:tensor<[1024, 2048], Float32, CPU>[@model.layers.16.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=614), symbol:model.layers.16.self_attn.v_proj.weight])[symbol:model.layers.16.self_attn.v_proj.weight] - tensor.CPU.register () -> (%1150:tensor<[128], Float32, CPU>[@model.layers.16.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=618), symbol:model.layers.16.self_attn.q_norm.weight])[symbol:model.layers.16.self_attn.q_norm.weight] - tensor.CPU.register () -> (%5521:tensor<[128], Float32, CPU>[@model.layers.16.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=620), symbol:model.layers.16.self_attn.k_norm.weight])[symbol:model.layers.16.self_attn.k_norm.weight] - tensor.CPU.register () -> (%672:tensor<[2048, 2048], Float32, CPU>[@model.layers.16.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=633), symbol:model.layers.16.self_attn.o_proj.weight])[symbol:model.layers.16.self_attn.o_proj.weight] - tensor.CPU.register () -> (%6793:tensor<[2048], Float32, CPU>[@model.layers.16.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=636), symbol:model.layers.16.post_attention_layernorm.weight])[symbol:model.layers.16.post_attention_layernorm.weight] - tensor.CPU.register () -> (%993:tensor<[6144, 2048], Float32, CPU>[@model.layers.16.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=637), symbol:model.layers.16.mlp.gate_proj.weight])[symbol:model.layers.16.mlp.gate_proj.weight] - tensor.CPU.register () -> (%226:tensor<[6144, 2048], Float32, CPU>[@model.layers.16.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=640), symbol:model.layers.16.mlp.up_proj.weight])[symbol:model.layers.16.mlp.up_proj.weight] - tensor.CPU.register () -> (%7287:tensor<[2048, 6144], Float32, CPU>[@model.layers.16.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=642), symbol:model.layers.16.mlp.down_proj.weight])[symbol:model.layers.16.mlp.down_proj.weight] - tensor.CPU.register () -> (%7811:tensor<[2048], Float32, CPU>[@model.layers.17.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=645), symbol:model.layers.17.input_layernorm.weight])[symbol:model.layers.17.input_layernorm.weight] - tensor.CPU.register () -> (%5758:tensor<[2048, 2048], Float32, CPU>[@model.layers.17.self_attn.q_proj.weight][symbol:model.layers.17.self_attn.q_proj.weight])[symbol:model.layers.17.self_attn.q_proj.weight] - tensor.CPU.register () -> (%2828:tensor<[1024, 2048], Float32, CPU>[@model.layers.17.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=646), symbol:model.layers.17.self_attn.k_proj.weight])[symbol:model.layers.17.self_attn.k_proj.weight] - tensor.CPU.register () -> (%417:tensor<[1024, 2048], Float32, CPU>[@model.layers.17.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=648), symbol:model.layers.17.self_attn.v_proj.weight])[symbol:model.layers.17.self_attn.v_proj.weight] - tensor.CPU.register () -> (%59:tensor<[128], Float32, CPU>[@model.layers.17.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=652), symbol:model.layers.17.self_attn.q_norm.weight])[symbol:model.layers.17.self_attn.q_norm.weight] - tensor.CPU.register () -> (%7588:tensor<[128], Float32, CPU>[@model.layers.17.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=654), symbol:model.layers.17.self_attn.k_norm.weight])[symbol:model.layers.17.self_attn.k_norm.weight] - tensor.CPU.register () -> (%5285:tensor<[2048, 2048], Float32, CPU>[@model.layers.17.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=667), symbol:model.layers.17.self_attn.o_proj.weight])[symbol:model.layers.17.self_attn.o_proj.weight] - tensor.CPU.register () -> (%3787:tensor<[2048], Float32, CPU>[@model.layers.17.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=670), symbol:model.layers.17.post_attention_layernorm.weight])[symbol:model.layers.17.post_attention_layernorm.weight] - tensor.CPU.register () -> (%4841:tensor<[6144, 2048], Float32, CPU>[@model.layers.17.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=671), symbol:model.layers.17.mlp.gate_proj.weight])[symbol:model.layers.17.mlp.gate_proj.weight] - tensor.CPU.register () -> (%4784:tensor<[6144, 2048], Float32, CPU>[@model.layers.17.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=674), symbol:model.layers.17.mlp.up_proj.weight])[symbol:model.layers.17.mlp.up_proj.weight] - tensor.CPU.register () -> (%1908:tensor<[2048, 6144], Float32, CPU>[@model.layers.17.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=676), symbol:model.layers.17.mlp.down_proj.weight])[symbol:model.layers.17.mlp.down_proj.weight] - tensor.CPU.register () -> (%310:tensor<[2048], Float32, CPU>[@model.layers.18.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=679), symbol:model.layers.18.input_layernorm.weight])[symbol:model.layers.18.input_layernorm.weight] - tensor.CPU.register () -> (%7352:tensor<[2048, 2048], Float32, CPU>[@model.layers.18.self_attn.q_proj.weight][symbol:model.layers.18.self_attn.q_proj.weight])[symbol:model.layers.18.self_attn.q_proj.weight] - tensor.CPU.register () -> (%6436:tensor<[1024, 2048], Float32, CPU>[@model.layers.18.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=680), symbol:model.layers.18.self_attn.k_proj.weight])[symbol:model.layers.18.self_attn.k_proj.weight] - tensor.CPU.register () -> (%6164:tensor<[1024, 2048], Float32, CPU>[@model.layers.18.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=682), symbol:model.layers.18.self_attn.v_proj.weight])[symbol:model.layers.18.self_attn.v_proj.weight] - tensor.CPU.register () -> (%2747:tensor<[128], Float32, CPU>[@model.layers.18.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=686), symbol:model.layers.18.self_attn.q_norm.weight])[symbol:model.layers.18.self_attn.q_norm.weight] - tensor.CPU.register () -> (%5281:tensor<[128], Float32, CPU>[@model.layers.18.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=688), symbol:model.layers.18.self_attn.k_norm.weight])[symbol:model.layers.18.self_attn.k_norm.weight] - tensor.CPU.register () -> (%7646:tensor<[2048, 2048], Float32, CPU>[@model.layers.18.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=701), symbol:model.layers.18.self_attn.o_proj.weight])[symbol:model.layers.18.self_attn.o_proj.weight] - tensor.CPU.register () -> (%2540:tensor<[2048], Float32, CPU>[@model.layers.18.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=704), symbol:model.layers.18.post_attention_layernorm.weight])[symbol:model.layers.18.post_attention_layernorm.weight] - tensor.CPU.register () -> (%6101:tensor<[6144, 2048], Float32, CPU>[@model.layers.18.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=705), symbol:model.layers.18.mlp.gate_proj.weight])[symbol:model.layers.18.mlp.gate_proj.weight] - tensor.CPU.register () -> (%2195:tensor<[6144, 2048], Float32, CPU>[@model.layers.18.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=708), symbol:model.layers.18.mlp.up_proj.weight])[symbol:model.layers.18.mlp.up_proj.weight] - tensor.CPU.register () -> (%3651:tensor<[2048, 6144], Float32, CPU>[@model.layers.18.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=710), symbol:model.layers.18.mlp.down_proj.weight])[symbol:model.layers.18.mlp.down_proj.weight] - tensor.CPU.register () -> (%3722:tensor<[2048], Float32, CPU>[@model.layers.19.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=713), symbol:model.layers.19.input_layernorm.weight])[symbol:model.layers.19.input_layernorm.weight] - tensor.CPU.register () -> (%1141:tensor<[2048, 2048], Float32, CPU>[@model.layers.19.self_attn.q_proj.weight][symbol:model.layers.19.self_attn.q_proj.weight])[symbol:model.layers.19.self_attn.q_proj.weight] - tensor.CPU.register () -> (%651:tensor<[1024, 2048], Float32, CPU>[@model.layers.19.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=714), symbol:model.layers.19.self_attn.k_proj.weight])[symbol:model.layers.19.self_attn.k_proj.weight] - tensor.CPU.register () -> (%254:tensor<[1024, 2048], Float32, CPU>[@model.layers.19.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=716), symbol:model.layers.19.self_attn.v_proj.weight])[symbol:model.layers.19.self_attn.v_proj.weight] - tensor.CPU.register () -> (%610:tensor<[128], Float32, CPU>[@model.layers.19.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=720), symbol:model.layers.19.self_attn.q_norm.weight])[symbol:model.layers.19.self_attn.q_norm.weight] - tensor.CPU.register () -> (%3691:tensor<[128], Float32, CPU>[@model.layers.19.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=722), symbol:model.layers.19.self_attn.k_norm.weight])[symbol:model.layers.19.self_attn.k_norm.weight] - tensor.CPU.register () -> (%7002:tensor<[2048, 2048], Float32, CPU>[@model.layers.19.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=735), symbol:model.layers.19.self_attn.o_proj.weight])[symbol:model.layers.19.self_attn.o_proj.weight] - tensor.CPU.register () -> (%3446:tensor<[2048], Float32, CPU>[@model.layers.19.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=738), symbol:model.layers.19.post_attention_layernorm.weight])[symbol:model.layers.19.post_attention_layernorm.weight] - tensor.CPU.register () -> (%2118:tensor<[6144, 2048], Float32, CPU>[@model.layers.19.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=739), symbol:model.layers.19.mlp.gate_proj.weight])[symbol:model.layers.19.mlp.gate_proj.weight] - tensor.CPU.register () -> (%283:tensor<[6144, 2048], Float32, CPU>[@model.layers.19.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=742), symbol:model.layers.19.mlp.up_proj.weight])[symbol:model.layers.19.mlp.up_proj.weight] - tensor.CPU.register () -> (%1264:tensor<[2048, 6144], Float32, CPU>[@model.layers.19.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=744), symbol:model.layers.19.mlp.down_proj.weight])[symbol:model.layers.19.mlp.down_proj.weight] - tensor.CPU.register () -> (%5183:tensor<[2048], Float32, CPU>[@model.layers.20.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=747), symbol:model.layers.20.input_layernorm.weight])[symbol:model.layers.20.input_layernorm.weight] - tensor.CPU.register () -> (%6004:tensor<[2048, 2048], Float32, CPU>[@model.layers.20.self_attn.q_proj.weight][symbol:model.layers.20.self_attn.q_proj.weight])[symbol:model.layers.20.self_attn.q_proj.weight] - tensor.CPU.register () -> (%4764:tensor<[1024, 2048], Float32, CPU>[@model.layers.20.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=748), symbol:model.layers.20.self_attn.k_proj.weight])[symbol:model.layers.20.self_attn.k_proj.weight] - tensor.CPU.register () -> (%3516:tensor<[1024, 2048], Float32, CPU>[@model.layers.20.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=750), symbol:model.layers.20.self_attn.v_proj.weight])[symbol:model.layers.20.self_attn.v_proj.weight] - tensor.CPU.register () -> (%2042:tensor<[128], Float32, CPU>[@model.layers.20.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=754), symbol:model.layers.20.self_attn.q_norm.weight])[symbol:model.layers.20.self_attn.q_norm.weight] - tensor.CPU.register () -> (%1646:tensor<[128], Float32, CPU>[@model.layers.20.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=756), symbol:model.layers.20.self_attn.k_norm.weight])[symbol:model.layers.20.self_attn.k_norm.weight] - tensor.CPU.register () -> (%3587:tensor<[2048, 2048], Float32, CPU>[@model.layers.20.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=769), symbol:model.layers.20.self_attn.o_proj.weight])[symbol:model.layers.20.self_attn.o_proj.weight] - tensor.CPU.register () -> (%2726:tensor<[2048], Float32, CPU>[@model.layers.20.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=772), symbol:model.layers.20.post_attention_layernorm.weight])[symbol:model.layers.20.post_attention_layernorm.weight] - tensor.CPU.register () -> (%3656:tensor<[6144, 2048], Float32, CPU>[@model.layers.20.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=773), symbol:model.layers.20.mlp.gate_proj.weight])[symbol:model.layers.20.mlp.gate_proj.weight] - tensor.CPU.register () -> (%802:tensor<[6144, 2048], Float32, CPU>[@model.layers.20.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=776), symbol:model.layers.20.mlp.up_proj.weight])[symbol:model.layers.20.mlp.up_proj.weight] - tensor.CPU.register () -> (%62:tensor<[2048, 6144], Float32, CPU>[@model.layers.20.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=778), symbol:model.layers.20.mlp.down_proj.weight])[symbol:model.layers.20.mlp.down_proj.weight] - tensor.CPU.register () -> (%1237:tensor<[2048], Float32, CPU>[@model.layers.21.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=781), symbol:model.layers.21.input_layernorm.weight])[symbol:model.layers.21.input_layernorm.weight] - tensor.CPU.register () -> (%2397:tensor<[2048, 2048], Float32, CPU>[@model.layers.21.self_attn.q_proj.weight][symbol:model.layers.21.self_attn.q_proj.weight])[symbol:model.layers.21.self_attn.q_proj.weight] - tensor.CPU.register () -> (%7562:tensor<[1024, 2048], Float32, CPU>[@model.layers.21.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=782), symbol:model.layers.21.self_attn.k_proj.weight])[symbol:model.layers.21.self_attn.k_proj.weight] - tensor.CPU.register () -> (%4665:tensor<[1024, 2048], Float32, CPU>[@model.layers.21.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=784), symbol:model.layers.21.self_attn.v_proj.weight])[symbol:model.layers.21.self_attn.v_proj.weight] - tensor.CPU.register () -> (%6195:tensor<[128], Float32, CPU>[@model.layers.21.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=788), symbol:model.layers.21.self_attn.q_norm.weight])[symbol:model.layers.21.self_attn.q_norm.weight] - tensor.CPU.register () -> (%701:tensor<[128], Float32, CPU>[@model.layers.21.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=790), symbol:model.layers.21.self_attn.k_norm.weight])[symbol:model.layers.21.self_attn.k_norm.weight] - tensor.CPU.register () -> (%5913:tensor<[2048, 2048], Float32, CPU>[@model.layers.21.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=803), symbol:model.layers.21.self_attn.o_proj.weight])[symbol:model.layers.21.self_attn.o_proj.weight] - tensor.CPU.register () -> (%4765:tensor<[2048], Float32, CPU>[@model.layers.21.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=806), symbol:model.layers.21.post_attention_layernorm.weight])[symbol:model.layers.21.post_attention_layernorm.weight] - tensor.CPU.register () -> (%864:tensor<[6144, 2048], Float32, CPU>[@model.layers.21.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=807), symbol:model.layers.21.mlp.gate_proj.weight])[symbol:model.layers.21.mlp.gate_proj.weight] - tensor.CPU.register () -> (%923:tensor<[6144, 2048], Float32, CPU>[@model.layers.21.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=810), symbol:model.layers.21.mlp.up_proj.weight])[symbol:model.layers.21.mlp.up_proj.weight] - tensor.CPU.register () -> (%6934:tensor<[2048, 6144], Float32, CPU>[@model.layers.21.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=812), symbol:model.layers.21.mlp.down_proj.weight])[symbol:model.layers.21.mlp.down_proj.weight] - tensor.CPU.register () -> (%425:tensor<[2048], Float32, CPU>[@model.layers.22.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=815), symbol:model.layers.22.input_layernorm.weight])[symbol:model.layers.22.input_layernorm.weight] - tensor.CPU.register () -> (%1036:tensor<[2048, 2048], Float32, CPU>[@model.layers.22.self_attn.q_proj.weight][symbol:model.layers.22.self_attn.q_proj.weight])[symbol:model.layers.22.self_attn.q_proj.weight] - tensor.CPU.register () -> (%6990:tensor<[1024, 2048], Float32, CPU>[@model.layers.22.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=816), symbol:model.layers.22.self_attn.k_proj.weight])[symbol:model.layers.22.self_attn.k_proj.weight] - tensor.CPU.register () -> (%2703:tensor<[1024, 2048], Float32, CPU>[@model.layers.22.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=818), symbol:model.layers.22.self_attn.v_proj.weight])[symbol:model.layers.22.self_attn.v_proj.weight] - tensor.CPU.register () -> (%1995:tensor<[128], Float32, CPU>[@model.layers.22.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=822), symbol:model.layers.22.self_attn.q_norm.weight])[symbol:model.layers.22.self_attn.q_norm.weight] - tensor.CPU.register () -> (%2702:tensor<[128], Float32, CPU>[@model.layers.22.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=824), symbol:model.layers.22.self_attn.k_norm.weight])[symbol:model.layers.22.self_attn.k_norm.weight] - tensor.CPU.register () -> (%2221:tensor<[2048, 2048], Float32, CPU>[@model.layers.22.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=837), symbol:model.layers.22.self_attn.o_proj.weight])[symbol:model.layers.22.self_attn.o_proj.weight] - tensor.CPU.register () -> (%5286:tensor<[2048], Float32, CPU>[@model.layers.22.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=840), symbol:model.layers.22.post_attention_layernorm.weight])[symbol:model.layers.22.post_attention_layernorm.weight] - tensor.CPU.register () -> (%7377:tensor<[6144, 2048], Float32, CPU>[@model.layers.22.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=841), symbol:model.layers.22.mlp.gate_proj.weight])[symbol:model.layers.22.mlp.gate_proj.weight] - tensor.CPU.register () -> (%694:tensor<[6144, 2048], Float32, CPU>[@model.layers.22.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=844), symbol:model.layers.22.mlp.up_proj.weight])[symbol:model.layers.22.mlp.up_proj.weight] - tensor.CPU.register () -> (%1401:tensor<[2048, 6144], Float32, CPU>[@model.layers.22.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=846), symbol:model.layers.22.mlp.down_proj.weight])[symbol:model.layers.22.mlp.down_proj.weight] - tensor.CPU.register () -> (%809:tensor<[2048], Float32, CPU>[@model.layers.23.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=849), symbol:model.layers.23.input_layernorm.weight])[symbol:model.layers.23.input_layernorm.weight] - tensor.CPU.register () -> (%2936:tensor<[2048, 2048], Float32, CPU>[@model.layers.23.self_attn.q_proj.weight][symbol:model.layers.23.self_attn.q_proj.weight])[symbol:model.layers.23.self_attn.q_proj.weight] - tensor.CPU.register () -> (%577:tensor<[1024, 2048], Float32, CPU>[@model.layers.23.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=850), symbol:model.layers.23.self_attn.k_proj.weight])[symbol:model.layers.23.self_attn.k_proj.weight] - tensor.CPU.register () -> (%5308:tensor<[1024, 2048], Float32, CPU>[@model.layers.23.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=852), symbol:model.layers.23.self_attn.v_proj.weight])[symbol:model.layers.23.self_attn.v_proj.weight] - tensor.CPU.register () -> (%5454:tensor<[128], Float32, CPU>[@model.layers.23.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=856), symbol:model.layers.23.self_attn.q_norm.weight])[symbol:model.layers.23.self_attn.q_norm.weight] - tensor.CPU.register () -> (%1089:tensor<[128], Float32, CPU>[@model.layers.23.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=858), symbol:model.layers.23.self_attn.k_norm.weight])[symbol:model.layers.23.self_attn.k_norm.weight] - tensor.CPU.register () -> (%4076:tensor<[2048, 2048], Float32, CPU>[@model.layers.23.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=871), symbol:model.layers.23.self_attn.o_proj.weight])[symbol:model.layers.23.self_attn.o_proj.weight] - tensor.CPU.register () -> (%4535:tensor<[2048], Float32, CPU>[@model.layers.23.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874), symbol:model.layers.23.post_attention_layernorm.weight])[symbol:model.layers.23.post_attention_layernorm.weight] - tensor.CPU.register () -> (%7750:tensor<[6144, 2048], Float32, CPU>[@model.layers.23.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=875), symbol:model.layers.23.mlp.gate_proj.weight])[symbol:model.layers.23.mlp.gate_proj.weight] - tensor.CPU.register () -> (%4744:tensor<[6144, 2048], Float32, CPU>[@model.layers.23.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=878), symbol:model.layers.23.mlp.up_proj.weight])[symbol:model.layers.23.mlp.up_proj.weight] - tensor.CPU.register () -> (%2933:tensor<[2048, 6144], Float32, CPU>[@model.layers.23.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=880), symbol:model.layers.23.mlp.down_proj.weight])[symbol:model.layers.23.mlp.down_proj.weight] - tensor.CPU.register () -> (%1154:tensor<[2048], Float32, CPU>[@model.layers.24.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=883), symbol:model.layers.24.input_layernorm.weight])[symbol:model.layers.24.input_layernorm.weight] - tensor.CPU.register () -> (%2384:tensor<[2048, 2048], Float32, CPU>[@model.layers.24.self_attn.q_proj.weight][symbol:model.layers.24.self_attn.q_proj.weight])[symbol:model.layers.24.self_attn.q_proj.weight] - tensor.CPU.register () -> (%2620:tensor<[1024, 2048], Float32, CPU>[@model.layers.24.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=884), symbol:model.layers.24.self_attn.k_proj.weight])[symbol:model.layers.24.self_attn.k_proj.weight] - tensor.CPU.register () -> (%3265:tensor<[1024, 2048], Float32, CPU>[@model.layers.24.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=886), symbol:model.layers.24.self_attn.v_proj.weight])[symbol:model.layers.24.self_attn.v_proj.weight] - tensor.CPU.register () -> (%2985:tensor<[128], Float32, CPU>[@model.layers.24.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=890), symbol:model.layers.24.self_attn.q_norm.weight])[symbol:model.layers.24.self_attn.q_norm.weight] - tensor.CPU.register () -> (%3894:tensor<[128], Float32, CPU>[@model.layers.24.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=892), symbol:model.layers.24.self_attn.k_norm.weight])[symbol:model.layers.24.self_attn.k_norm.weight] - tensor.CPU.register () -> (%7488:tensor<[2048, 2048], Float32, CPU>[@model.layers.24.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=905), symbol:model.layers.24.self_attn.o_proj.weight])[symbol:model.layers.24.self_attn.o_proj.weight] - tensor.CPU.register () -> (%6713:tensor<[2048], Float32, CPU>[@model.layers.24.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=908), symbol:model.layers.24.post_attention_layernorm.weight])[symbol:model.layers.24.post_attention_layernorm.weight] - tensor.CPU.register () -> (%1336:tensor<[6144, 2048], Float32, CPU>[@model.layers.24.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=909), symbol:model.layers.24.mlp.gate_proj.weight])[symbol:model.layers.24.mlp.gate_proj.weight] - tensor.CPU.register () -> (%7035:tensor<[6144, 2048], Float32, CPU>[@model.layers.24.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=912), symbol:model.layers.24.mlp.up_proj.weight])[symbol:model.layers.24.mlp.up_proj.weight] - tensor.CPU.register () -> (%7069:tensor<[2048, 6144], Float32, CPU>[@model.layers.24.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=914), symbol:model.layers.24.mlp.down_proj.weight])[symbol:model.layers.24.mlp.down_proj.weight] - tensor.CPU.register () -> (%6496:tensor<[2048], Float32, CPU>[@model.layers.25.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=917), symbol:model.layers.25.input_layernorm.weight])[symbol:model.layers.25.input_layernorm.weight] - tensor.CPU.register () -> (%1852:tensor<[2048, 2048], Float32, CPU>[@model.layers.25.self_attn.q_proj.weight][symbol:model.layers.25.self_attn.q_proj.weight])[symbol:model.layers.25.self_attn.q_proj.weight] - tensor.CPU.register () -> (%3615:tensor<[1024, 2048], Float32, CPU>[@model.layers.25.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=918), symbol:model.layers.25.self_attn.k_proj.weight])[symbol:model.layers.25.self_attn.k_proj.weight] - tensor.CPU.register () -> (%2014:tensor<[1024, 2048], Float32, CPU>[@model.layers.25.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=920), symbol:model.layers.25.self_attn.v_proj.weight])[symbol:model.layers.25.self_attn.v_proj.weight] - tensor.CPU.register () -> (%2021:tensor<[128], Float32, CPU>[@model.layers.25.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=924), symbol:model.layers.25.self_attn.q_norm.weight])[symbol:model.layers.25.self_attn.q_norm.weight] - tensor.CPU.register () -> (%1413:tensor<[128], Float32, CPU>[@model.layers.25.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=926), symbol:model.layers.25.self_attn.k_norm.weight])[symbol:model.layers.25.self_attn.k_norm.weight] - tensor.CPU.register () -> (%7074:tensor<[2048, 2048], Float32, CPU>[@model.layers.25.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=939), symbol:model.layers.25.self_attn.o_proj.weight])[symbol:model.layers.25.self_attn.o_proj.weight] - tensor.CPU.register () -> (%6424:tensor<[2048], Float32, CPU>[@model.layers.25.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=942), symbol:model.layers.25.post_attention_layernorm.weight])[symbol:model.layers.25.post_attention_layernorm.weight] - tensor.CPU.register () -> (%1860:tensor<[6144, 2048], Float32, CPU>[@model.layers.25.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=943), symbol:model.layers.25.mlp.gate_proj.weight])[symbol:model.layers.25.mlp.gate_proj.weight] - tensor.CPU.register () -> (%5840:tensor<[6144, 2048], Float32, CPU>[@model.layers.25.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=946), symbol:model.layers.25.mlp.up_proj.weight])[symbol:model.layers.25.mlp.up_proj.weight] - tensor.CPU.register () -> (%6869:tensor<[2048, 6144], Float32, CPU>[@model.layers.25.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=948), symbol:model.layers.25.mlp.down_proj.weight])[symbol:model.layers.25.mlp.down_proj.weight] - tensor.CPU.register () -> (%611:tensor<[2048], Float32, CPU>[@model.layers.26.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=951), symbol:model.layers.26.input_layernorm.weight])[symbol:model.layers.26.input_layernorm.weight] - tensor.CPU.register () -> (%1040:tensor<[2048, 2048], Float32, CPU>[@model.layers.26.self_attn.q_proj.weight][symbol:model.layers.26.self_attn.q_proj.weight])[symbol:model.layers.26.self_attn.q_proj.weight] - tensor.CPU.register () -> (%2312:tensor<[1024, 2048], Float32, CPU>[@model.layers.26.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=952), symbol:model.layers.26.self_attn.k_proj.weight])[symbol:model.layers.26.self_attn.k_proj.weight] - tensor.CPU.register () -> (%174:tensor<[1024, 2048], Float32, CPU>[@model.layers.26.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=954), symbol:model.layers.26.self_attn.v_proj.weight])[symbol:model.layers.26.self_attn.v_proj.weight] - tensor.CPU.register () -> (%2799:tensor<[128], Float32, CPU>[@model.layers.26.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=958), symbol:model.layers.26.self_attn.q_norm.weight])[symbol:model.layers.26.self_attn.q_norm.weight] - tensor.CPU.register () -> (%6479:tensor<[128], Float32, CPU>[@model.layers.26.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=960), symbol:model.layers.26.self_attn.k_norm.weight])[symbol:model.layers.26.self_attn.k_norm.weight] - tensor.CPU.register () -> (%504:tensor<[2048, 2048], Float32, CPU>[@model.layers.26.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=973), symbol:model.layers.26.self_attn.o_proj.weight])[symbol:model.layers.26.self_attn.o_proj.weight] - tensor.CPU.register () -> (%5096:tensor<[2048], Float32, CPU>[@model.layers.26.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=976), symbol:model.layers.26.post_attention_layernorm.weight])[symbol:model.layers.26.post_attention_layernorm.weight] - tensor.CPU.register () -> (%4867:tensor<[6144, 2048], Float32, CPU>[@model.layers.26.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=977), symbol:model.layers.26.mlp.gate_proj.weight])[symbol:model.layers.26.mlp.gate_proj.weight] - tensor.CPU.register () -> (%2619:tensor<[6144, 2048], Float32, CPU>[@model.layers.26.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=980), symbol:model.layers.26.mlp.up_proj.weight])[symbol:model.layers.26.mlp.up_proj.weight] - tensor.CPU.register () -> (%1355:tensor<[2048, 6144], Float32, CPU>[@model.layers.26.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=982), symbol:model.layers.26.mlp.down_proj.weight])[symbol:model.layers.26.mlp.down_proj.weight] - tensor.CPU.register () -> (%6381:tensor<[2048], Float32, CPU>[@model.layers.27.input_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=985), symbol:model.layers.27.input_layernorm.weight])[symbol:model.layers.27.input_layernorm.weight] - tensor.CPU.register () -> (%5946:tensor<[2048, 2048], Float32, CPU>[@model.layers.27.self_attn.q_proj.weight][symbol:model.layers.27.self_attn.q_proj.weight])[symbol:model.layers.27.self_attn.q_proj.weight] - tensor.CPU.register () -> (%1802:tensor<[1024, 2048], Float32, CPU>[@model.layers.27.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=986), symbol:model.layers.27.self_attn.k_proj.weight])[symbol:model.layers.27.self_attn.k_proj.weight] - tensor.CPU.register () -> (%6652:tensor<[1024, 2048], Float32, CPU>[@model.layers.27.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=988), symbol:model.layers.27.self_attn.v_proj.weight])[symbol:model.layers.27.self_attn.v_proj.weight] - tensor.CPU.register () -> (%6206:tensor<[128], Float32, CPU>[@model.layers.27.self_attn.q_norm.weight][quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=992), symbol:model.layers.27.self_attn.q_norm.weight])[symbol:model.layers.27.self_attn.q_norm.weight] - tensor.CPU.register () -> (%1743:tensor<[128], Float32, CPU>[@model.layers.27.self_attn.k_norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=994), symbol:model.layers.27.self_attn.k_norm.weight])[symbol:model.layers.27.self_attn.k_norm.weight] - tensor.CPU.register () -> (%5189:tensor<[2048, 2048], Float32, CPU>[@model.layers.27.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1007), symbol:model.layers.27.self_attn.o_proj.weight])[symbol:model.layers.27.self_attn.o_proj.weight] - tensor.CPU.register () -> (%3001:tensor<[2048], Float32, CPU>[@model.layers.27.post_attention_layernorm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1010), symbol:model.layers.27.post_attention_layernorm.weight])[symbol:model.layers.27.post_attention_layernorm.weight] - tensor.CPU.register () -> (%5561:tensor<[6144, 2048], Float32, CPU>[@model.layers.27.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1011), symbol:model.layers.27.mlp.gate_proj.weight])[symbol:model.layers.27.mlp.gate_proj.weight] - tensor.CPU.register () -> (%2731:tensor<[6144, 2048], Float32, CPU>[@model.layers.27.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1014), symbol:model.layers.27.mlp.up_proj.weight])[symbol:model.layers.27.mlp.up_proj.weight] - tensor.CPU.register () -> (%3783:tensor<[2048, 6144], Float32, CPU>[@model.layers.27.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1016), symbol:model.layers.27.mlp.down_proj.weight])[symbol:model.layers.27.mlp.down_proj.weight] - tensor.CPU.register () -> (%5765:tensor<[2048], Float32, CPU>[@model.norm.weight][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1019), symbol:model.norm.weight])[symbol:model.norm.weight] - tensor.CPU.register () -> (%6130:tensor<[151936, 2048], Float32, CPU>[@lm_head.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1020), symbol:lm_head.weight])[symbol:lm_head.weight] + tensor.CPU.register () -> (%361:tensor<[151936, 2048], Float32, CPU>[@model.embed_tokens.weight][quant_recipe:QuantSpec(Raw(type: Float32), uuid=61), symbol:model.embed_tokens.weight])[symbol:model.embed_tokens.weight] + tensor.CPU.register () -> (%8204:tensor<[1024, 5, 128], UInt16PerTensor, CPU>[@rope_sin][symbol:rope_sin])[symbol:rope_sin] + tensor.CPU.register () -> (%8205:tensor<[1024, 5, 128], UInt16PerTensor, CPU>[@rope_cos][symbol:rope_cos])[symbol:rope_cos] + tensor.CPU.register () -> (%4256:tensor<[2048], Float32, CPU>[@model.layers.0.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=67), symbol:model.layers.0.input_layernorm.weight])[symbol:model.layers.0.input_layernorm.weight] + tensor.CPU.register () -> (%6100:tensor<[2048, 2048], Float32, CPU>[@model.layers.0.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=68), symbol:model.layers.0.self_attn.q_proj.weight])[symbol:model.layers.0.self_attn.q_proj.weight] + tensor.CPU.register () -> (%326:tensor<[1024, 2048], Float32, CPU>[@model.layers.0.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=70), symbol:model.layers.0.self_attn.k_proj.weight])[symbol:model.layers.0.self_attn.k_proj.weight] + tensor.CPU.register () -> (%4416:tensor<[1024, 2048], Float32, CPU>[@model.layers.0.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=72), symbol:model.layers.0.self_attn.v_proj.weight])[symbol:model.layers.0.self_attn.v_proj.weight] + tensor.CPU.register () -> (%7842:tensor<[128], Float32, CPU>[@model.layers.0.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75), symbol:model.layers.0.self_attn.q_norm.weight])[symbol:model.layers.0.self_attn.q_norm.weight] + tensor.CPU.register () -> (%8182:tensor<[128], Float32, CPU>[@model.layers.0.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77), symbol:model.layers.0.self_attn.k_norm.weight])[symbol:model.layers.0.self_attn.k_norm.weight] + tensor.CPU.register () -> (%7659:tensor<[2048, 2048], Float32, CPU>[@model.layers.0.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=90), symbol:model.layers.0.self_attn.o_proj.weight])[symbol:model.layers.0.self_attn.o_proj.weight] + tensor.CPU.register () -> (%875:tensor<[2048], Float32, CPU>[@model.layers.0.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=93), symbol:model.layers.0.post_attention_layernorm.weight])[symbol:model.layers.0.post_attention_layernorm.weight] + tensor.CPU.register () -> (%6720:tensor<[6144, 2048], Float32, CPU>[@model.layers.0.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=94), symbol:model.layers.0.mlp.up_proj.weight])[symbol:model.layers.0.mlp.up_proj.weight] + tensor.CPU.register () -> (%2083:tensor<[6144, 2048], Float32, CPU>[@model.layers.0.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=96), symbol:model.layers.0.mlp.gate_proj.weight])[symbol:model.layers.0.mlp.gate_proj.weight] + tensor.CPU.register () -> (%1968:tensor<[2048, 6144], Float32, CPU>[@model.layers.0.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=99), symbol:model.layers.0.mlp.down_proj.weight])[symbol:model.layers.0.mlp.down_proj.weight] + tensor.CPU.register () -> (%2912:tensor<[2048], Float32, CPU>[@model.layers.1.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=102), symbol:model.layers.1.input_layernorm.weight])[symbol:model.layers.1.input_layernorm.weight] + tensor.CPU.register () -> (%2564:tensor<[2048, 2048], Float32, CPU>[@model.layers.1.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=103), symbol:model.layers.1.self_attn.q_proj.weight])[symbol:model.layers.1.self_attn.q_proj.weight] + tensor.CPU.register () -> (%3192:tensor<[1024, 2048], Float32, CPU>[@model.layers.1.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=105), symbol:model.layers.1.self_attn.k_proj.weight])[symbol:model.layers.1.self_attn.k_proj.weight] + tensor.CPU.register () -> (%3127:tensor<[1024, 2048], Float32, CPU>[@model.layers.1.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=107), symbol:model.layers.1.self_attn.v_proj.weight])[symbol:model.layers.1.self_attn.v_proj.weight] + tensor.CPU.register () -> (%6782:tensor<[128], Float32, CPU>[@model.layers.1.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=110), symbol:model.layers.1.self_attn.q_norm.weight])[symbol:model.layers.1.self_attn.q_norm.weight] + tensor.CPU.register () -> (%5890:tensor<[128], Float32, CPU>[@model.layers.1.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112), symbol:model.layers.1.self_attn.k_norm.weight])[symbol:model.layers.1.self_attn.k_norm.weight] + tensor.CPU.register () -> (%683:tensor<[2048, 2048], Float32, CPU>[@model.layers.1.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=125), symbol:model.layers.1.self_attn.o_proj.weight])[symbol:model.layers.1.self_attn.o_proj.weight] + tensor.CPU.register () -> (%181:tensor<[2048], Float32, CPU>[@model.layers.1.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=128), symbol:model.layers.1.post_attention_layernorm.weight])[symbol:model.layers.1.post_attention_layernorm.weight] + tensor.CPU.register () -> (%2963:tensor<[6144, 2048], Float32, CPU>[@model.layers.1.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=129), symbol:model.layers.1.mlp.up_proj.weight])[symbol:model.layers.1.mlp.up_proj.weight] + tensor.CPU.register () -> (%5173:tensor<[6144, 2048], Float32, CPU>[@model.layers.1.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=131), symbol:model.layers.1.mlp.gate_proj.weight])[symbol:model.layers.1.mlp.gate_proj.weight] + tensor.CPU.register () -> (%5467:tensor<[2048, 6144], Float32, CPU>[@model.layers.1.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=134), symbol:model.layers.1.mlp.down_proj.weight])[symbol:model.layers.1.mlp.down_proj.weight] + tensor.CPU.register () -> (%2379:tensor<[2048], Float32, CPU>[@model.layers.2.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=137), symbol:model.layers.2.input_layernorm.weight])[symbol:model.layers.2.input_layernorm.weight] + tensor.CPU.register () -> (%3865:tensor<[2048, 2048], Float32, CPU>[@model.layers.2.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=138), symbol:model.layers.2.self_attn.q_proj.weight])[symbol:model.layers.2.self_attn.q_proj.weight] + tensor.CPU.register () -> (%1586:tensor<[1024, 2048], Float32, CPU>[@model.layers.2.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=140), symbol:model.layers.2.self_attn.k_proj.weight])[symbol:model.layers.2.self_attn.k_proj.weight] + tensor.CPU.register () -> (%4803:tensor<[1024, 2048], Float32, CPU>[@model.layers.2.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=142), symbol:model.layers.2.self_attn.v_proj.weight])[symbol:model.layers.2.self_attn.v_proj.weight] + tensor.CPU.register () -> (%6973:tensor<[128], Float32, CPU>[@model.layers.2.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=145), symbol:model.layers.2.self_attn.q_norm.weight])[symbol:model.layers.2.self_attn.q_norm.weight] + tensor.CPU.register () -> (%1763:tensor<[128], Float32, CPU>[@model.layers.2.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=147), symbol:model.layers.2.self_attn.k_norm.weight])[symbol:model.layers.2.self_attn.k_norm.weight] + tensor.CPU.register () -> (%6817:tensor<[2048, 2048], Float32, CPU>[@model.layers.2.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=160), symbol:model.layers.2.self_attn.o_proj.weight])[symbol:model.layers.2.self_attn.o_proj.weight] + tensor.CPU.register () -> (%984:tensor<[2048], Float32, CPU>[@model.layers.2.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=163), symbol:model.layers.2.post_attention_layernorm.weight])[symbol:model.layers.2.post_attention_layernorm.weight] + tensor.CPU.register () -> (%1952:tensor<[6144, 2048], Float32, CPU>[@model.layers.2.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=164), symbol:model.layers.2.mlp.up_proj.weight])[symbol:model.layers.2.mlp.up_proj.weight] + tensor.CPU.register () -> (%6793:tensor<[6144, 2048], Float32, CPU>[@model.layers.2.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=166), symbol:model.layers.2.mlp.gate_proj.weight])[symbol:model.layers.2.mlp.gate_proj.weight] + tensor.CPU.register () -> (%7125:tensor<[2048, 6144], Float32, CPU>[@model.layers.2.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=169), symbol:model.layers.2.mlp.down_proj.weight])[symbol:model.layers.2.mlp.down_proj.weight] + tensor.CPU.register () -> (%1636:tensor<[2048], Float32, CPU>[@model.layers.3.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=172), symbol:model.layers.3.input_layernorm.weight])[symbol:model.layers.3.input_layernorm.weight] + tensor.CPU.register () -> (%5214:tensor<[2048, 2048], Float32, CPU>[@model.layers.3.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=173), symbol:model.layers.3.self_attn.q_proj.weight])[symbol:model.layers.3.self_attn.q_proj.weight] + tensor.CPU.register () -> (%6900:tensor<[1024, 2048], Float32, CPU>[@model.layers.3.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=175), symbol:model.layers.3.self_attn.k_proj.weight])[symbol:model.layers.3.self_attn.k_proj.weight] + tensor.CPU.register () -> (%2141:tensor<[1024, 2048], Float32, CPU>[@model.layers.3.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=177), symbol:model.layers.3.self_attn.v_proj.weight])[symbol:model.layers.3.self_attn.v_proj.weight] + tensor.CPU.register () -> (%3669:tensor<[128], Float32, CPU>[@model.layers.3.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=180), symbol:model.layers.3.self_attn.q_norm.weight])[symbol:model.layers.3.self_attn.q_norm.weight] + tensor.CPU.register () -> (%4334:tensor<[128], Float32, CPU>[@model.layers.3.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=182), symbol:model.layers.3.self_attn.k_norm.weight])[symbol:model.layers.3.self_attn.k_norm.weight] + tensor.CPU.register () -> (%8150:tensor<[2048, 2048], Float32, CPU>[@model.layers.3.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=195), symbol:model.layers.3.self_attn.o_proj.weight])[symbol:model.layers.3.self_attn.o_proj.weight] + tensor.CPU.register () -> (%4105:tensor<[2048], Float32, CPU>[@model.layers.3.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=198), symbol:model.layers.3.post_attention_layernorm.weight])[symbol:model.layers.3.post_attention_layernorm.weight] + tensor.CPU.register () -> (%6926:tensor<[6144, 2048], Float32, CPU>[@model.layers.3.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=199), symbol:model.layers.3.mlp.up_proj.weight])[symbol:model.layers.3.mlp.up_proj.weight] + tensor.CPU.register () -> (%6632:tensor<[6144, 2048], Float32, CPU>[@model.layers.3.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=201), symbol:model.layers.3.mlp.gate_proj.weight])[symbol:model.layers.3.mlp.gate_proj.weight] + tensor.CPU.register () -> (%1818:tensor<[2048, 6144], Float32, CPU>[@model.layers.3.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=204), symbol:model.layers.3.mlp.down_proj.weight])[symbol:model.layers.3.mlp.down_proj.weight] + tensor.CPU.register () -> (%269:tensor<[2048], Float32, CPU>[@model.layers.4.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=207), symbol:model.layers.4.input_layernorm.weight])[symbol:model.layers.4.input_layernorm.weight] + tensor.CPU.register () -> (%973:tensor<[2048, 2048], Float32, CPU>[@model.layers.4.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=208), symbol:model.layers.4.self_attn.q_proj.weight])[symbol:model.layers.4.self_attn.q_proj.weight] + tensor.CPU.register () -> (%6187:tensor<[1024, 2048], Float32, CPU>[@model.layers.4.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=210), symbol:model.layers.4.self_attn.k_proj.weight])[symbol:model.layers.4.self_attn.k_proj.weight] + tensor.CPU.register () -> (%6381:tensor<[1024, 2048], Float32, CPU>[@model.layers.4.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=212), symbol:model.layers.4.self_attn.v_proj.weight])[symbol:model.layers.4.self_attn.v_proj.weight] + tensor.CPU.register () -> (%466:tensor<[128], Float32, CPU>[@model.layers.4.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=215), symbol:model.layers.4.self_attn.q_norm.weight])[symbol:model.layers.4.self_attn.q_norm.weight] + tensor.CPU.register () -> (%6834:tensor<[128], Float32, CPU>[@model.layers.4.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=217), symbol:model.layers.4.self_attn.k_norm.weight])[symbol:model.layers.4.self_attn.k_norm.weight] + tensor.CPU.register () -> (%7756:tensor<[2048, 2048], Float32, CPU>[@model.layers.4.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=230), symbol:model.layers.4.self_attn.o_proj.weight])[symbol:model.layers.4.self_attn.o_proj.weight] + tensor.CPU.register () -> (%4372:tensor<[2048], Float32, CPU>[@model.layers.4.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=233), symbol:model.layers.4.post_attention_layernorm.weight])[symbol:model.layers.4.post_attention_layernorm.weight] + tensor.CPU.register () -> (%6103:tensor<[6144, 2048], Float32, CPU>[@model.layers.4.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=234), symbol:model.layers.4.mlp.up_proj.weight])[symbol:model.layers.4.mlp.up_proj.weight] + tensor.CPU.register () -> (%2402:tensor<[6144, 2048], Float32, CPU>[@model.layers.4.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=236), symbol:model.layers.4.mlp.gate_proj.weight])[symbol:model.layers.4.mlp.gate_proj.weight] + tensor.CPU.register () -> (%355:tensor<[2048, 6144], Float32, CPU>[@model.layers.4.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=239), symbol:model.layers.4.mlp.down_proj.weight])[symbol:model.layers.4.mlp.down_proj.weight] + tensor.CPU.register () -> (%7342:tensor<[2048], Float32, CPU>[@model.layers.5.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=242), symbol:model.layers.5.input_layernorm.weight])[symbol:model.layers.5.input_layernorm.weight] + tensor.CPU.register () -> (%756:tensor<[2048, 2048], Float32, CPU>[@model.layers.5.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=243), symbol:model.layers.5.self_attn.q_proj.weight])[symbol:model.layers.5.self_attn.q_proj.weight] + tensor.CPU.register () -> (%7540:tensor<[1024, 2048], Float32, CPU>[@model.layers.5.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=245), symbol:model.layers.5.self_attn.k_proj.weight])[symbol:model.layers.5.self_attn.k_proj.weight] + tensor.CPU.register () -> (%1477:tensor<[1024, 2048], Float32, CPU>[@model.layers.5.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=247), symbol:model.layers.5.self_attn.v_proj.weight])[symbol:model.layers.5.self_attn.v_proj.weight] + tensor.CPU.register () -> (%3429:tensor<[128], Float32, CPU>[@model.layers.5.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=250), symbol:model.layers.5.self_attn.q_norm.weight])[symbol:model.layers.5.self_attn.q_norm.weight] + tensor.CPU.register () -> (%2834:tensor<[128], Float32, CPU>[@model.layers.5.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=252), symbol:model.layers.5.self_attn.k_norm.weight])[symbol:model.layers.5.self_attn.k_norm.weight] + tensor.CPU.register () -> (%8077:tensor<[2048, 2048], Float32, CPU>[@model.layers.5.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=265), symbol:model.layers.5.self_attn.o_proj.weight])[symbol:model.layers.5.self_attn.o_proj.weight] + tensor.CPU.register () -> (%5901:tensor<[2048], Float32, CPU>[@model.layers.5.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=268), symbol:model.layers.5.post_attention_layernorm.weight])[symbol:model.layers.5.post_attention_layernorm.weight] + tensor.CPU.register () -> (%769:tensor<[6144, 2048], Float32, CPU>[@model.layers.5.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=269), symbol:model.layers.5.mlp.up_proj.weight])[symbol:model.layers.5.mlp.up_proj.weight] + tensor.CPU.register () -> (%1874:tensor<[6144, 2048], Float32, CPU>[@model.layers.5.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=271), symbol:model.layers.5.mlp.gate_proj.weight])[symbol:model.layers.5.mlp.gate_proj.weight] + tensor.CPU.register () -> (%4892:tensor<[2048, 6144], Float32, CPU>[@model.layers.5.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=274), symbol:model.layers.5.mlp.down_proj.weight])[symbol:model.layers.5.mlp.down_proj.weight] + tensor.CPU.register () -> (%3540:tensor<[2048], Float32, CPU>[@model.layers.6.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=277), symbol:model.layers.6.input_layernorm.weight])[symbol:model.layers.6.input_layernorm.weight] + tensor.CPU.register () -> (%4173:tensor<[2048, 2048], Float32, CPU>[@model.layers.6.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=278), symbol:model.layers.6.self_attn.q_proj.weight])[symbol:model.layers.6.self_attn.q_proj.weight] + tensor.CPU.register () -> (%877:tensor<[1024, 2048], Float32, CPU>[@model.layers.6.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=280), symbol:model.layers.6.self_attn.k_proj.weight])[symbol:model.layers.6.self_attn.k_proj.weight] + tensor.CPU.register () -> (%1344:tensor<[1024, 2048], Float32, CPU>[@model.layers.6.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=282), symbol:model.layers.6.self_attn.v_proj.weight])[symbol:model.layers.6.self_attn.v_proj.weight] + tensor.CPU.register () -> (%7487:tensor<[128], Float32, CPU>[@model.layers.6.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=285), symbol:model.layers.6.self_attn.q_norm.weight])[symbol:model.layers.6.self_attn.q_norm.weight] + tensor.CPU.register () -> (%5126:tensor<[128], Float32, CPU>[@model.layers.6.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=287), symbol:model.layers.6.self_attn.k_norm.weight])[symbol:model.layers.6.self_attn.k_norm.weight] + tensor.CPU.register () -> (%3940:tensor<[2048, 2048], Float32, CPU>[@model.layers.6.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=300), symbol:model.layers.6.self_attn.o_proj.weight])[symbol:model.layers.6.self_attn.o_proj.weight] + tensor.CPU.register () -> (%5378:tensor<[2048], Float32, CPU>[@model.layers.6.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=303), symbol:model.layers.6.post_attention_layernorm.weight])[symbol:model.layers.6.post_attention_layernorm.weight] + tensor.CPU.register () -> (%4973:tensor<[6144, 2048], Float32, CPU>[@model.layers.6.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=304), symbol:model.layers.6.mlp.up_proj.weight])[symbol:model.layers.6.mlp.up_proj.weight] + tensor.CPU.register () -> (%7150:tensor<[6144, 2048], Float32, CPU>[@model.layers.6.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=306), symbol:model.layers.6.mlp.gate_proj.weight])[symbol:model.layers.6.mlp.gate_proj.weight] + tensor.CPU.register () -> (%5276:tensor<[2048, 6144], Float32, CPU>[@model.layers.6.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=309), symbol:model.layers.6.mlp.down_proj.weight])[symbol:model.layers.6.mlp.down_proj.weight] + tensor.CPU.register () -> (%1865:tensor<[2048], Float32, CPU>[@model.layers.7.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=312), symbol:model.layers.7.input_layernorm.weight])[symbol:model.layers.7.input_layernorm.weight] + tensor.CPU.register () -> (%7715:tensor<[2048, 2048], Float32, CPU>[@model.layers.7.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=313), symbol:model.layers.7.self_attn.q_proj.weight])[symbol:model.layers.7.self_attn.q_proj.weight] + tensor.CPU.register () -> (%1658:tensor<[1024, 2048], Float32, CPU>[@model.layers.7.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=315), symbol:model.layers.7.self_attn.k_proj.weight])[symbol:model.layers.7.self_attn.k_proj.weight] + tensor.CPU.register () -> (%5896:tensor<[1024, 2048], Float32, CPU>[@model.layers.7.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=317), symbol:model.layers.7.self_attn.v_proj.weight])[symbol:model.layers.7.self_attn.v_proj.weight] + tensor.CPU.register () -> (%7733:tensor<[128], Float32, CPU>[@model.layers.7.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=320), symbol:model.layers.7.self_attn.q_norm.weight])[symbol:model.layers.7.self_attn.q_norm.weight] + tensor.CPU.register () -> (%1643:tensor<[128], Float32, CPU>[@model.layers.7.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=322), symbol:model.layers.7.self_attn.k_norm.weight])[symbol:model.layers.7.self_attn.k_norm.weight] + tensor.CPU.register () -> (%2968:tensor<[2048, 2048], Float32, CPU>[@model.layers.7.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=335), symbol:model.layers.7.self_attn.o_proj.weight])[symbol:model.layers.7.self_attn.o_proj.weight] + tensor.CPU.register () -> (%2978:tensor<[2048], Float32, CPU>[@model.layers.7.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=338), symbol:model.layers.7.post_attention_layernorm.weight])[symbol:model.layers.7.post_attention_layernorm.weight] + tensor.CPU.register () -> (%2994:tensor<[6144, 2048], Float32, CPU>[@model.layers.7.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=339), symbol:model.layers.7.mlp.up_proj.weight])[symbol:model.layers.7.mlp.up_proj.weight] + tensor.CPU.register () -> (%6231:tensor<[6144, 2048], Float32, CPU>[@model.layers.7.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=341), symbol:model.layers.7.mlp.gate_proj.weight])[symbol:model.layers.7.mlp.gate_proj.weight] + tensor.CPU.register () -> (%7639:tensor<[2048, 6144], Float32, CPU>[@model.layers.7.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=344), symbol:model.layers.7.mlp.down_proj.weight])[symbol:model.layers.7.mlp.down_proj.weight] + tensor.CPU.register () -> (%2157:tensor<[2048], Float32, CPU>[@model.layers.8.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=347), symbol:model.layers.8.input_layernorm.weight])[symbol:model.layers.8.input_layernorm.weight] + tensor.CPU.register () -> (%7895:tensor<[2048, 2048], Float32, CPU>[@model.layers.8.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=348), symbol:model.layers.8.self_attn.q_proj.weight])[symbol:model.layers.8.self_attn.q_proj.weight] + tensor.CPU.register () -> (%2622:tensor<[1024, 2048], Float32, CPU>[@model.layers.8.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=350), symbol:model.layers.8.self_attn.k_proj.weight])[symbol:model.layers.8.self_attn.k_proj.weight] + tensor.CPU.register () -> (%5444:tensor<[1024, 2048], Float32, CPU>[@model.layers.8.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=352), symbol:model.layers.8.self_attn.v_proj.weight])[symbol:model.layers.8.self_attn.v_proj.weight] + tensor.CPU.register () -> (%1167:tensor<[128], Float32, CPU>[@model.layers.8.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=355), symbol:model.layers.8.self_attn.q_norm.weight])[symbol:model.layers.8.self_attn.q_norm.weight] + tensor.CPU.register () -> (%7773:tensor<[128], Float32, CPU>[@model.layers.8.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=357), symbol:model.layers.8.self_attn.k_norm.weight])[symbol:model.layers.8.self_attn.k_norm.weight] + tensor.CPU.register () -> (%2063:tensor<[2048, 2048], Float32, CPU>[@model.layers.8.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=370), symbol:model.layers.8.self_attn.o_proj.weight])[symbol:model.layers.8.self_attn.o_proj.weight] + tensor.CPU.register () -> (%4799:tensor<[2048], Float32, CPU>[@model.layers.8.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=373), symbol:model.layers.8.post_attention_layernorm.weight])[symbol:model.layers.8.post_attention_layernorm.weight] + tensor.CPU.register () -> (%5512:tensor<[6144, 2048], Float32, CPU>[@model.layers.8.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=374), symbol:model.layers.8.mlp.up_proj.weight])[symbol:model.layers.8.mlp.up_proj.weight] + tensor.CPU.register () -> (%4801:tensor<[6144, 2048], Float32, CPU>[@model.layers.8.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=376), symbol:model.layers.8.mlp.gate_proj.weight])[symbol:model.layers.8.mlp.gate_proj.weight] + tensor.CPU.register () -> (%5712:tensor<[2048, 6144], Float32, CPU>[@model.layers.8.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=379), symbol:model.layers.8.mlp.down_proj.weight])[symbol:model.layers.8.mlp.down_proj.weight] + tensor.CPU.register () -> (%3935:tensor<[2048], Float32, CPU>[@model.layers.9.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=382), symbol:model.layers.9.input_layernorm.weight])[symbol:model.layers.9.input_layernorm.weight] + tensor.CPU.register () -> (%1754:tensor<[2048, 2048], Float32, CPU>[@model.layers.9.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=383), symbol:model.layers.9.self_attn.q_proj.weight])[symbol:model.layers.9.self_attn.q_proj.weight] + tensor.CPU.register () -> (%7274:tensor<[1024, 2048], Float32, CPU>[@model.layers.9.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=385), symbol:model.layers.9.self_attn.k_proj.weight])[symbol:model.layers.9.self_attn.k_proj.weight] + tensor.CPU.register () -> (%4983:tensor<[1024, 2048], Float32, CPU>[@model.layers.9.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=387), symbol:model.layers.9.self_attn.v_proj.weight])[symbol:model.layers.9.self_attn.v_proj.weight] + tensor.CPU.register () -> (%1127:tensor<[128], Float32, CPU>[@model.layers.9.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390), symbol:model.layers.9.self_attn.q_norm.weight])[symbol:model.layers.9.self_attn.q_norm.weight] + tensor.CPU.register () -> (%964:tensor<[128], Float32, CPU>[@model.layers.9.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392), symbol:model.layers.9.self_attn.k_norm.weight])[symbol:model.layers.9.self_attn.k_norm.weight] + tensor.CPU.register () -> (%4355:tensor<[2048, 2048], Float32, CPU>[@model.layers.9.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=405), symbol:model.layers.9.self_attn.o_proj.weight])[symbol:model.layers.9.self_attn.o_proj.weight] + tensor.CPU.register () -> (%4793:tensor<[2048], Float32, CPU>[@model.layers.9.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=408), symbol:model.layers.9.post_attention_layernorm.weight])[symbol:model.layers.9.post_attention_layernorm.weight] + tensor.CPU.register () -> (%7662:tensor<[6144, 2048], Float32, CPU>[@model.layers.9.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=409), symbol:model.layers.9.mlp.up_proj.weight])[symbol:model.layers.9.mlp.up_proj.weight] + tensor.CPU.register () -> (%6098:tensor<[6144, 2048], Float32, CPU>[@model.layers.9.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=411), symbol:model.layers.9.mlp.gate_proj.weight])[symbol:model.layers.9.mlp.gate_proj.weight] + tensor.CPU.register () -> (%333:tensor<[2048, 6144], Float32, CPU>[@model.layers.9.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=414), symbol:model.layers.9.mlp.down_proj.weight])[symbol:model.layers.9.mlp.down_proj.weight] + tensor.CPU.register () -> (%3044:tensor<[2048], Float32, CPU>[@model.layers.10.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=417), symbol:model.layers.10.input_layernorm.weight])[symbol:model.layers.10.input_layernorm.weight] + tensor.CPU.register () -> (%208:tensor<[2048, 2048], Float32, CPU>[@model.layers.10.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=418), symbol:model.layers.10.self_attn.q_proj.weight])[symbol:model.layers.10.self_attn.q_proj.weight] + tensor.CPU.register () -> (%5527:tensor<[1024, 2048], Float32, CPU>[@model.layers.10.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=420), symbol:model.layers.10.self_attn.k_proj.weight])[symbol:model.layers.10.self_attn.k_proj.weight] + tensor.CPU.register () -> (%2767:tensor<[1024, 2048], Float32, CPU>[@model.layers.10.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=422), symbol:model.layers.10.self_attn.v_proj.weight])[symbol:model.layers.10.self_attn.v_proj.weight] + tensor.CPU.register () -> (%6433:tensor<[128], Float32, CPU>[@model.layers.10.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=425), symbol:model.layers.10.self_attn.q_norm.weight])[symbol:model.layers.10.self_attn.q_norm.weight] + tensor.CPU.register () -> (%1215:tensor<[128], Float32, CPU>[@model.layers.10.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=427), symbol:model.layers.10.self_attn.k_norm.weight])[symbol:model.layers.10.self_attn.k_norm.weight] + tensor.CPU.register () -> (%2136:tensor<[2048, 2048], Float32, CPU>[@model.layers.10.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=440), symbol:model.layers.10.self_attn.o_proj.weight])[symbol:model.layers.10.self_attn.o_proj.weight] + tensor.CPU.register () -> (%1173:tensor<[2048], Float32, CPU>[@model.layers.10.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=443), symbol:model.layers.10.post_attention_layernorm.weight])[symbol:model.layers.10.post_attention_layernorm.weight] + tensor.CPU.register () -> (%4087:tensor<[6144, 2048], Float32, CPU>[@model.layers.10.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=444), symbol:model.layers.10.mlp.up_proj.weight])[symbol:model.layers.10.mlp.up_proj.weight] + tensor.CPU.register () -> (%6334:tensor<[6144, 2048], Float32, CPU>[@model.layers.10.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=446), symbol:model.layers.10.mlp.gate_proj.weight])[symbol:model.layers.10.mlp.gate_proj.weight] + tensor.CPU.register () -> (%2160:tensor<[2048, 6144], Float32, CPU>[@model.layers.10.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=449), symbol:model.layers.10.mlp.down_proj.weight])[symbol:model.layers.10.mlp.down_proj.weight] + tensor.CPU.register () -> (%6029:tensor<[2048], Float32, CPU>[@model.layers.11.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=452), symbol:model.layers.11.input_layernorm.weight])[symbol:model.layers.11.input_layernorm.weight] + tensor.CPU.register () -> (%87:tensor<[2048, 2048], Float32, CPU>[@model.layers.11.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=453), symbol:model.layers.11.self_attn.q_proj.weight])[symbol:model.layers.11.self_attn.q_proj.weight] + tensor.CPU.register () -> (%6705:tensor<[1024, 2048], Float32, CPU>[@model.layers.11.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=455), symbol:model.layers.11.self_attn.k_proj.weight])[symbol:model.layers.11.self_attn.k_proj.weight] + tensor.CPU.register () -> (%532:tensor<[1024, 2048], Float32, CPU>[@model.layers.11.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=457), symbol:model.layers.11.self_attn.v_proj.weight])[symbol:model.layers.11.self_attn.v_proj.weight] + tensor.CPU.register () -> (%2075:tensor<[128], Float32, CPU>[@model.layers.11.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=460), symbol:model.layers.11.self_attn.q_norm.weight])[symbol:model.layers.11.self_attn.q_norm.weight] + tensor.CPU.register () -> (%5298:tensor<[128], Float32, CPU>[@model.layers.11.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=462), symbol:model.layers.11.self_attn.k_norm.weight])[symbol:model.layers.11.self_attn.k_norm.weight] + tensor.CPU.register () -> (%6489:tensor<[2048, 2048], Float32, CPU>[@model.layers.11.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=475), symbol:model.layers.11.self_attn.o_proj.weight])[symbol:model.layers.11.self_attn.o_proj.weight] + tensor.CPU.register () -> (%407:tensor<[2048], Float32, CPU>[@model.layers.11.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=478), symbol:model.layers.11.post_attention_layernorm.weight])[symbol:model.layers.11.post_attention_layernorm.weight] + tensor.CPU.register () -> (%6171:tensor<[6144, 2048], Float32, CPU>[@model.layers.11.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=479), symbol:model.layers.11.mlp.up_proj.weight])[symbol:model.layers.11.mlp.up_proj.weight] + tensor.CPU.register () -> (%8146:tensor<[6144, 2048], Float32, CPU>[@model.layers.11.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=481), symbol:model.layers.11.mlp.gate_proj.weight])[symbol:model.layers.11.mlp.gate_proj.weight] + tensor.CPU.register () -> (%575:tensor<[2048, 6144], Float32, CPU>[@model.layers.11.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=484), symbol:model.layers.11.mlp.down_proj.weight])[symbol:model.layers.11.mlp.down_proj.weight] + tensor.CPU.register () -> (%861:tensor<[2048], Float32, CPU>[@model.layers.12.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=487), symbol:model.layers.12.input_layernorm.weight])[symbol:model.layers.12.input_layernorm.weight] + tensor.CPU.register () -> (%1138:tensor<[2048, 2048], Float32, CPU>[@model.layers.12.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=488), symbol:model.layers.12.self_attn.q_proj.weight])[symbol:model.layers.12.self_attn.q_proj.weight] + tensor.CPU.register () -> (%8178:tensor<[1024, 2048], Float32, CPU>[@model.layers.12.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=490), symbol:model.layers.12.self_attn.k_proj.weight])[symbol:model.layers.12.self_attn.k_proj.weight] + tensor.CPU.register () -> (%5503:tensor<[1024, 2048], Float32, CPU>[@model.layers.12.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=492), symbol:model.layers.12.self_attn.v_proj.weight])[symbol:model.layers.12.self_attn.v_proj.weight] + tensor.CPU.register () -> (%5531:tensor<[128], Float32, CPU>[@model.layers.12.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=495), symbol:model.layers.12.self_attn.q_norm.weight])[symbol:model.layers.12.self_attn.q_norm.weight] + tensor.CPU.register () -> (%7120:tensor<[128], Float32, CPU>[@model.layers.12.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=497), symbol:model.layers.12.self_attn.k_norm.weight])[symbol:model.layers.12.self_attn.k_norm.weight] + tensor.CPU.register () -> (%3812:tensor<[2048, 2048], Float32, CPU>[@model.layers.12.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=510), symbol:model.layers.12.self_attn.o_proj.weight])[symbol:model.layers.12.self_attn.o_proj.weight] + tensor.CPU.register () -> (%5701:tensor<[2048], Float32, CPU>[@model.layers.12.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=513), symbol:model.layers.12.post_attention_layernorm.weight])[symbol:model.layers.12.post_attention_layernorm.weight] + tensor.CPU.register () -> (%1006:tensor<[6144, 2048], Float32, CPU>[@model.layers.12.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=514), symbol:model.layers.12.mlp.up_proj.weight])[symbol:model.layers.12.mlp.up_proj.weight] + tensor.CPU.register () -> (%4400:tensor<[6144, 2048], Float32, CPU>[@model.layers.12.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=516), symbol:model.layers.12.mlp.gate_proj.weight])[symbol:model.layers.12.mlp.gate_proj.weight] + tensor.CPU.register () -> (%6759:tensor<[2048, 6144], Float32, CPU>[@model.layers.12.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=519), symbol:model.layers.12.mlp.down_proj.weight])[symbol:model.layers.12.mlp.down_proj.weight] + tensor.CPU.register () -> (%4069:tensor<[2048], Float32, CPU>[@model.layers.13.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=522), symbol:model.layers.13.input_layernorm.weight])[symbol:model.layers.13.input_layernorm.weight] + tensor.CPU.register () -> (%6517:tensor<[2048, 2048], Float32, CPU>[@model.layers.13.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=523), symbol:model.layers.13.self_attn.q_proj.weight])[symbol:model.layers.13.self_attn.q_proj.weight] + tensor.CPU.register () -> (%7247:tensor<[1024, 2048], Float32, CPU>[@model.layers.13.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=525), symbol:model.layers.13.self_attn.k_proj.weight])[symbol:model.layers.13.self_attn.k_proj.weight] + tensor.CPU.register () -> (%4830:tensor<[1024, 2048], Float32, CPU>[@model.layers.13.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=527), symbol:model.layers.13.self_attn.v_proj.weight])[symbol:model.layers.13.self_attn.v_proj.weight] + tensor.CPU.register () -> (%7510:tensor<[128], Float32, CPU>[@model.layers.13.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530), symbol:model.layers.13.self_attn.q_norm.weight])[symbol:model.layers.13.self_attn.q_norm.weight] + tensor.CPU.register () -> (%1546:tensor<[128], Float32, CPU>[@model.layers.13.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=532), symbol:model.layers.13.self_attn.k_norm.weight])[symbol:model.layers.13.self_attn.k_norm.weight] + tensor.CPU.register () -> (%4956:tensor<[2048, 2048], Float32, CPU>[@model.layers.13.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=545), symbol:model.layers.13.self_attn.o_proj.weight])[symbol:model.layers.13.self_attn.o_proj.weight] + tensor.CPU.register () -> (%1863:tensor<[2048], Float32, CPU>[@model.layers.13.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=548), symbol:model.layers.13.post_attention_layernorm.weight])[symbol:model.layers.13.post_attention_layernorm.weight] + tensor.CPU.register () -> (%4198:tensor<[6144, 2048], Float32, CPU>[@model.layers.13.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=549), symbol:model.layers.13.mlp.up_proj.weight])[symbol:model.layers.13.mlp.up_proj.weight] + tensor.CPU.register () -> (%3651:tensor<[6144, 2048], Float32, CPU>[@model.layers.13.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=551), symbol:model.layers.13.mlp.gate_proj.weight])[symbol:model.layers.13.mlp.gate_proj.weight] + tensor.CPU.register () -> (%5457:tensor<[2048, 6144], Float32, CPU>[@model.layers.13.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=554), symbol:model.layers.13.mlp.down_proj.weight])[symbol:model.layers.13.mlp.down_proj.weight] + tensor.CPU.register () -> (%4807:tensor<[2048], Float32, CPU>[@model.layers.14.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=557), symbol:model.layers.14.input_layernorm.weight])[symbol:model.layers.14.input_layernorm.weight] + tensor.CPU.register () -> (%2924:tensor<[2048, 2048], Float32, CPU>[@model.layers.14.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=558), symbol:model.layers.14.self_attn.q_proj.weight])[symbol:model.layers.14.self_attn.q_proj.weight] + tensor.CPU.register () -> (%6136:tensor<[1024, 2048], Float32, CPU>[@model.layers.14.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=560), symbol:model.layers.14.self_attn.k_proj.weight])[symbol:model.layers.14.self_attn.k_proj.weight] + tensor.CPU.register () -> (%5240:tensor<[1024, 2048], Float32, CPU>[@model.layers.14.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=562), symbol:model.layers.14.self_attn.v_proj.weight])[symbol:model.layers.14.self_attn.v_proj.weight] + tensor.CPU.register () -> (%3852:tensor<[128], Float32, CPU>[@model.layers.14.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=565), symbol:model.layers.14.self_attn.q_norm.weight])[symbol:model.layers.14.self_attn.q_norm.weight] + tensor.CPU.register () -> (%5634:tensor<[128], Float32, CPU>[@model.layers.14.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=567), symbol:model.layers.14.self_attn.k_norm.weight])[symbol:model.layers.14.self_attn.k_norm.weight] + tensor.CPU.register () -> (%331:tensor<[2048, 2048], Float32, CPU>[@model.layers.14.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=580), symbol:model.layers.14.self_attn.o_proj.weight])[symbol:model.layers.14.self_attn.o_proj.weight] + tensor.CPU.register () -> (%7059:tensor<[2048], Float32, CPU>[@model.layers.14.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=583), symbol:model.layers.14.post_attention_layernorm.weight])[symbol:model.layers.14.post_attention_layernorm.weight] + tensor.CPU.register () -> (%631:tensor<[6144, 2048], Float32, CPU>[@model.layers.14.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=584), symbol:model.layers.14.mlp.up_proj.weight])[symbol:model.layers.14.mlp.up_proj.weight] + tensor.CPU.register () -> (%2479:tensor<[6144, 2048], Float32, CPU>[@model.layers.14.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=586), symbol:model.layers.14.mlp.gate_proj.weight])[symbol:model.layers.14.mlp.gate_proj.weight] + tensor.CPU.register () -> (%4629:tensor<[2048, 6144], Float32, CPU>[@model.layers.14.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=589), symbol:model.layers.14.mlp.down_proj.weight])[symbol:model.layers.14.mlp.down_proj.weight] + tensor.CPU.register () -> (%1464:tensor<[2048], Float32, CPU>[@model.layers.15.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=592), symbol:model.layers.15.input_layernorm.weight])[symbol:model.layers.15.input_layernorm.weight] + tensor.CPU.register () -> (%4989:tensor<[2048, 2048], Float32, CPU>[@model.layers.15.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=593), symbol:model.layers.15.self_attn.q_proj.weight])[symbol:model.layers.15.self_attn.q_proj.weight] + tensor.CPU.register () -> (%2031:tensor<[1024, 2048], Float32, CPU>[@model.layers.15.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=595), symbol:model.layers.15.self_attn.k_proj.weight])[symbol:model.layers.15.self_attn.k_proj.weight] + tensor.CPU.register () -> (%1922:tensor<[1024, 2048], Float32, CPU>[@model.layers.15.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=597), symbol:model.layers.15.self_attn.v_proj.weight])[symbol:model.layers.15.self_attn.v_proj.weight] + tensor.CPU.register () -> (%6176:tensor<[128], Float32, CPU>[@model.layers.15.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=600), symbol:model.layers.15.self_attn.q_norm.weight])[symbol:model.layers.15.self_attn.q_norm.weight] + tensor.CPU.register () -> (%5870:tensor<[128], Float32, CPU>[@model.layers.15.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=602), symbol:model.layers.15.self_attn.k_norm.weight])[symbol:model.layers.15.self_attn.k_norm.weight] + tensor.CPU.register () -> (%6498:tensor<[2048, 2048], Float32, CPU>[@model.layers.15.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=615), symbol:model.layers.15.self_attn.o_proj.weight])[symbol:model.layers.15.self_attn.o_proj.weight] + tensor.CPU.register () -> (%7534:tensor<[2048], Float32, CPU>[@model.layers.15.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618), symbol:model.layers.15.post_attention_layernorm.weight])[symbol:model.layers.15.post_attention_layernorm.weight] + tensor.CPU.register () -> (%4158:tensor<[6144, 2048], Float32, CPU>[@model.layers.15.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=619), symbol:model.layers.15.mlp.up_proj.weight])[symbol:model.layers.15.mlp.up_proj.weight] + tensor.CPU.register () -> (%5708:tensor<[6144, 2048], Float32, CPU>[@model.layers.15.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=621), symbol:model.layers.15.mlp.gate_proj.weight])[symbol:model.layers.15.mlp.gate_proj.weight] + tensor.CPU.register () -> (%6996:tensor<[2048, 6144], Float32, CPU>[@model.layers.15.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=624), symbol:model.layers.15.mlp.down_proj.weight])[symbol:model.layers.15.mlp.down_proj.weight] + tensor.CPU.register () -> (%5186:tensor<[2048], Float32, CPU>[@model.layers.16.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=627), symbol:model.layers.16.input_layernorm.weight])[symbol:model.layers.16.input_layernorm.weight] + tensor.CPU.register () -> (%3600:tensor<[2048, 2048], Float32, CPU>[@model.layers.16.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=628), symbol:model.layers.16.self_attn.q_proj.weight])[symbol:model.layers.16.self_attn.q_proj.weight] + tensor.CPU.register () -> (%7334:tensor<[1024, 2048], Float32, CPU>[@model.layers.16.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=630), symbol:model.layers.16.self_attn.k_proj.weight])[symbol:model.layers.16.self_attn.k_proj.weight] + tensor.CPU.register () -> (%1736:tensor<[1024, 2048], Float32, CPU>[@model.layers.16.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=632), symbol:model.layers.16.self_attn.v_proj.weight])[symbol:model.layers.16.self_attn.v_proj.weight] + tensor.CPU.register () -> (%8015:tensor<[128], Float32, CPU>[@model.layers.16.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=635), symbol:model.layers.16.self_attn.q_norm.weight])[symbol:model.layers.16.self_attn.q_norm.weight] + tensor.CPU.register () -> (%8043:tensor<[128], Float32, CPU>[@model.layers.16.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=637), symbol:model.layers.16.self_attn.k_norm.weight])[symbol:model.layers.16.self_attn.k_norm.weight] + tensor.CPU.register () -> (%1749:tensor<[2048, 2048], Float32, CPU>[@model.layers.16.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=650), symbol:model.layers.16.self_attn.o_proj.weight])[symbol:model.layers.16.self_attn.o_proj.weight] + tensor.CPU.register () -> (%3582:tensor<[2048], Float32, CPU>[@model.layers.16.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=653), symbol:model.layers.16.post_attention_layernorm.weight])[symbol:model.layers.16.post_attention_layernorm.weight] + tensor.CPU.register () -> (%6009:tensor<[6144, 2048], Float32, CPU>[@model.layers.16.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=654), symbol:model.layers.16.mlp.up_proj.weight])[symbol:model.layers.16.mlp.up_proj.weight] + tensor.CPU.register () -> (%2546:tensor<[6144, 2048], Float32, CPU>[@model.layers.16.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=656), symbol:model.layers.16.mlp.gate_proj.weight])[symbol:model.layers.16.mlp.gate_proj.weight] + tensor.CPU.register () -> (%3430:tensor<[2048, 6144], Float32, CPU>[@model.layers.16.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=659), symbol:model.layers.16.mlp.down_proj.weight])[symbol:model.layers.16.mlp.down_proj.weight] + tensor.CPU.register () -> (%4318:tensor<[2048], Float32, CPU>[@model.layers.17.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=662), symbol:model.layers.17.input_layernorm.weight])[symbol:model.layers.17.input_layernorm.weight] + tensor.CPU.register () -> (%5713:tensor<[2048, 2048], Float32, CPU>[@model.layers.17.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=663), symbol:model.layers.17.self_attn.q_proj.weight])[symbol:model.layers.17.self_attn.q_proj.weight] + tensor.CPU.register () -> (%5811:tensor<[1024, 2048], Float32, CPU>[@model.layers.17.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=665), symbol:model.layers.17.self_attn.k_proj.weight])[symbol:model.layers.17.self_attn.k_proj.weight] + tensor.CPU.register () -> (%4106:tensor<[1024, 2048], Float32, CPU>[@model.layers.17.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=667), symbol:model.layers.17.self_attn.v_proj.weight])[symbol:model.layers.17.self_attn.v_proj.weight] + tensor.CPU.register () -> (%6494:tensor<[128], Float32, CPU>[@model.layers.17.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=670), symbol:model.layers.17.self_attn.q_norm.weight])[symbol:model.layers.17.self_attn.q_norm.weight] + tensor.CPU.register () -> (%7738:tensor<[128], Float32, CPU>[@model.layers.17.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=672), symbol:model.layers.17.self_attn.k_norm.weight])[symbol:model.layers.17.self_attn.k_norm.weight] + tensor.CPU.register () -> (%7459:tensor<[2048, 2048], Float32, CPU>[@model.layers.17.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=685), symbol:model.layers.17.self_attn.o_proj.weight])[symbol:model.layers.17.self_attn.o_proj.weight] + tensor.CPU.register () -> (%855:tensor<[2048], Float32, CPU>[@model.layers.17.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=688), symbol:model.layers.17.post_attention_layernorm.weight])[symbol:model.layers.17.post_attention_layernorm.weight] + tensor.CPU.register () -> (%8058:tensor<[6144, 2048], Float32, CPU>[@model.layers.17.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=689), symbol:model.layers.17.mlp.up_proj.weight])[symbol:model.layers.17.mlp.up_proj.weight] + tensor.CPU.register () -> (%6964:tensor<[6144, 2048], Float32, CPU>[@model.layers.17.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=691), symbol:model.layers.17.mlp.gate_proj.weight])[symbol:model.layers.17.mlp.gate_proj.weight] + tensor.CPU.register () -> (%2577:tensor<[2048, 6144], Float32, CPU>[@model.layers.17.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=694), symbol:model.layers.17.mlp.down_proj.weight])[symbol:model.layers.17.mlp.down_proj.weight] + tensor.CPU.register () -> (%3926:tensor<[2048], Float32, CPU>[@model.layers.18.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=697), symbol:model.layers.18.input_layernorm.weight])[symbol:model.layers.18.input_layernorm.weight] + tensor.CPU.register () -> (%1917:tensor<[2048, 2048], Float32, CPU>[@model.layers.18.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=698), symbol:model.layers.18.self_attn.q_proj.weight])[symbol:model.layers.18.self_attn.q_proj.weight] + tensor.CPU.register () -> (%1580:tensor<[1024, 2048], Float32, CPU>[@model.layers.18.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=700), symbol:model.layers.18.self_attn.k_proj.weight])[symbol:model.layers.18.self_attn.k_proj.weight] + tensor.CPU.register () -> (%4657:tensor<[1024, 2048], Float32, CPU>[@model.layers.18.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=702), symbol:model.layers.18.self_attn.v_proj.weight])[symbol:model.layers.18.self_attn.v_proj.weight] + tensor.CPU.register () -> (%5451:tensor<[128], Float32, CPU>[@model.layers.18.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=705), symbol:model.layers.18.self_attn.q_norm.weight])[symbol:model.layers.18.self_attn.q_norm.weight] + tensor.CPU.register () -> (%3229:tensor<[128], Float32, CPU>[@model.layers.18.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=707), symbol:model.layers.18.self_attn.k_norm.weight])[symbol:model.layers.18.self_attn.k_norm.weight] + tensor.CPU.register () -> (%1514:tensor<[2048, 2048], Float32, CPU>[@model.layers.18.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=720), symbol:model.layers.18.self_attn.o_proj.weight])[symbol:model.layers.18.self_attn.o_proj.weight] + tensor.CPU.register () -> (%910:tensor<[2048], Float32, CPU>[@model.layers.18.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=723), symbol:model.layers.18.post_attention_layernorm.weight])[symbol:model.layers.18.post_attention_layernorm.weight] + tensor.CPU.register () -> (%2694:tensor<[6144, 2048], Float32, CPU>[@model.layers.18.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=724), symbol:model.layers.18.mlp.up_proj.weight])[symbol:model.layers.18.mlp.up_proj.weight] + tensor.CPU.register () -> (%4440:tensor<[6144, 2048], Float32, CPU>[@model.layers.18.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=726), symbol:model.layers.18.mlp.gate_proj.weight])[symbol:model.layers.18.mlp.gate_proj.weight] + tensor.CPU.register () -> (%6785:tensor<[2048, 6144], Float32, CPU>[@model.layers.18.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=729), symbol:model.layers.18.mlp.down_proj.weight])[symbol:model.layers.18.mlp.down_proj.weight] + tensor.CPU.register () -> (%5637:tensor<[2048], Float32, CPU>[@model.layers.19.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732), symbol:model.layers.19.input_layernorm.weight])[symbol:model.layers.19.input_layernorm.weight] + tensor.CPU.register () -> (%542:tensor<[2048, 2048], Float32, CPU>[@model.layers.19.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=733), symbol:model.layers.19.self_attn.q_proj.weight])[symbol:model.layers.19.self_attn.q_proj.weight] + tensor.CPU.register () -> (%6845:tensor<[1024, 2048], Float32, CPU>[@model.layers.19.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=735), symbol:model.layers.19.self_attn.k_proj.weight])[symbol:model.layers.19.self_attn.k_proj.weight] + tensor.CPU.register () -> (%6082:tensor<[1024, 2048], Float32, CPU>[@model.layers.19.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=737), symbol:model.layers.19.self_attn.v_proj.weight])[symbol:model.layers.19.self_attn.v_proj.weight] + tensor.CPU.register () -> (%6718:tensor<[128], Float32, CPU>[@model.layers.19.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=740), symbol:model.layers.19.self_attn.q_norm.weight])[symbol:model.layers.19.self_attn.q_norm.weight] + tensor.CPU.register () -> (%1204:tensor<[128], Float32, CPU>[@model.layers.19.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=742), symbol:model.layers.19.self_attn.k_norm.weight])[symbol:model.layers.19.self_attn.k_norm.weight] + tensor.CPU.register () -> (%7572:tensor<[2048, 2048], Float32, CPU>[@model.layers.19.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=755), symbol:model.layers.19.self_attn.o_proj.weight])[symbol:model.layers.19.self_attn.o_proj.weight] + tensor.CPU.register () -> (%3257:tensor<[2048], Float32, CPU>[@model.layers.19.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758), symbol:model.layers.19.post_attention_layernorm.weight])[symbol:model.layers.19.post_attention_layernorm.weight] + tensor.CPU.register () -> (%6762:tensor<[6144, 2048], Float32, CPU>[@model.layers.19.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=759), symbol:model.layers.19.mlp.up_proj.weight])[symbol:model.layers.19.mlp.up_proj.weight] + tensor.CPU.register () -> (%3095:tensor<[6144, 2048], Float32, CPU>[@model.layers.19.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=761), symbol:model.layers.19.mlp.gate_proj.weight])[symbol:model.layers.19.mlp.gate_proj.weight] + tensor.CPU.register () -> (%3251:tensor<[2048, 6144], Float32, CPU>[@model.layers.19.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=764), symbol:model.layers.19.mlp.down_proj.weight])[symbol:model.layers.19.mlp.down_proj.weight] + tensor.CPU.register () -> (%2201:tensor<[2048], Float32, CPU>[@model.layers.20.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=767), symbol:model.layers.20.input_layernorm.weight])[symbol:model.layers.20.input_layernorm.weight] + tensor.CPU.register () -> (%196:tensor<[2048, 2048], Float32, CPU>[@model.layers.20.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=768), symbol:model.layers.20.self_attn.q_proj.weight])[symbol:model.layers.20.self_attn.q_proj.weight] + tensor.CPU.register () -> (%179:tensor<[1024, 2048], Float32, CPU>[@model.layers.20.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=770), symbol:model.layers.20.self_attn.k_proj.weight])[symbol:model.layers.20.self_attn.k_proj.weight] + tensor.CPU.register () -> (%3406:tensor<[1024, 2048], Float32, CPU>[@model.layers.20.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=772), symbol:model.layers.20.self_attn.v_proj.weight])[symbol:model.layers.20.self_attn.v_proj.weight] + tensor.CPU.register () -> (%760:tensor<[128], Float32, CPU>[@model.layers.20.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=775), symbol:model.layers.20.self_attn.q_norm.weight])[symbol:model.layers.20.self_attn.q_norm.weight] + tensor.CPU.register () -> (%2753:tensor<[128], Float32, CPU>[@model.layers.20.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=777), symbol:model.layers.20.self_attn.k_norm.weight])[symbol:model.layers.20.self_attn.k_norm.weight] + tensor.CPU.register () -> (%5869:tensor<[2048, 2048], Float32, CPU>[@model.layers.20.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=790), symbol:model.layers.20.self_attn.o_proj.weight])[symbol:model.layers.20.self_attn.o_proj.weight] + tensor.CPU.register () -> (%771:tensor<[2048], Float32, CPU>[@model.layers.20.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=793), symbol:model.layers.20.post_attention_layernorm.weight])[symbol:model.layers.20.post_attention_layernorm.weight] + tensor.CPU.register () -> (%2006:tensor<[6144, 2048], Float32, CPU>[@model.layers.20.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=794), symbol:model.layers.20.mlp.up_proj.weight])[symbol:model.layers.20.mlp.up_proj.weight] + tensor.CPU.register () -> (%6525:tensor<[6144, 2048], Float32, CPU>[@model.layers.20.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=796), symbol:model.layers.20.mlp.gate_proj.weight])[symbol:model.layers.20.mlp.gate_proj.weight] + tensor.CPU.register () -> (%6967:tensor<[2048, 6144], Float32, CPU>[@model.layers.20.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=799), symbol:model.layers.20.mlp.down_proj.weight])[symbol:model.layers.20.mlp.down_proj.weight] + tensor.CPU.register () -> (%4395:tensor<[2048], Float32, CPU>[@model.layers.21.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=802), symbol:model.layers.21.input_layernorm.weight])[symbol:model.layers.21.input_layernorm.weight] + tensor.CPU.register () -> (%4630:tensor<[2048, 2048], Float32, CPU>[@model.layers.21.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=803), symbol:model.layers.21.self_attn.q_proj.weight])[symbol:model.layers.21.self_attn.q_proj.weight] + tensor.CPU.register () -> (%4948:tensor<[1024, 2048], Float32, CPU>[@model.layers.21.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=805), symbol:model.layers.21.self_attn.k_proj.weight])[symbol:model.layers.21.self_attn.k_proj.weight] + tensor.CPU.register () -> (%5162:tensor<[1024, 2048], Float32, CPU>[@model.layers.21.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=807), symbol:model.layers.21.self_attn.v_proj.weight])[symbol:model.layers.21.self_attn.v_proj.weight] + tensor.CPU.register () -> (%7535:tensor<[128], Float32, CPU>[@model.layers.21.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810), symbol:model.layers.21.self_attn.q_norm.weight])[symbol:model.layers.21.self_attn.q_norm.weight] + tensor.CPU.register () -> (%1698:tensor<[128], Float32, CPU>[@model.layers.21.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=812), symbol:model.layers.21.self_attn.k_norm.weight])[symbol:model.layers.21.self_attn.k_norm.weight] + tensor.CPU.register () -> (%4030:tensor<[2048, 2048], Float32, CPU>[@model.layers.21.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=825), symbol:model.layers.21.self_attn.o_proj.weight])[symbol:model.layers.21.self_attn.o_proj.weight] + tensor.CPU.register () -> (%3010:tensor<[2048], Float32, CPU>[@model.layers.21.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=828), symbol:model.layers.21.post_attention_layernorm.weight])[symbol:model.layers.21.post_attention_layernorm.weight] + tensor.CPU.register () -> (%5608:tensor<[6144, 2048], Float32, CPU>[@model.layers.21.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=829), symbol:model.layers.21.mlp.up_proj.weight])[symbol:model.layers.21.mlp.up_proj.weight] + tensor.CPU.register () -> (%4800:tensor<[6144, 2048], Float32, CPU>[@model.layers.21.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=831), symbol:model.layers.21.mlp.gate_proj.weight])[symbol:model.layers.21.mlp.gate_proj.weight] + tensor.CPU.register () -> (%3518:tensor<[2048, 6144], Float32, CPU>[@model.layers.21.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=834), symbol:model.layers.21.mlp.down_proj.weight])[symbol:model.layers.21.mlp.down_proj.weight] + tensor.CPU.register () -> (%5381:tensor<[2048], Float32, CPU>[@model.layers.22.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=837), symbol:model.layers.22.input_layernorm.weight])[symbol:model.layers.22.input_layernorm.weight] + tensor.CPU.register () -> (%956:tensor<[2048, 2048], Float32, CPU>[@model.layers.22.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=838), symbol:model.layers.22.self_attn.q_proj.weight])[symbol:model.layers.22.self_attn.q_proj.weight] + tensor.CPU.register () -> (%4159:tensor<[1024, 2048], Float32, CPU>[@model.layers.22.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=840), symbol:model.layers.22.self_attn.k_proj.weight])[symbol:model.layers.22.self_attn.k_proj.weight] + tensor.CPU.register () -> (%6713:tensor<[1024, 2048], Float32, CPU>[@model.layers.22.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=842), symbol:model.layers.22.self_attn.v_proj.weight])[symbol:model.layers.22.self_attn.v_proj.weight] + tensor.CPU.register () -> (%1181:tensor<[128], Float32, CPU>[@model.layers.22.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=845), symbol:model.layers.22.self_attn.q_norm.weight])[symbol:model.layers.22.self_attn.q_norm.weight] + tensor.CPU.register () -> (%3001:tensor<[128], Float32, CPU>[@model.layers.22.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=847), symbol:model.layers.22.self_attn.k_norm.weight])[symbol:model.layers.22.self_attn.k_norm.weight] + tensor.CPU.register () -> (%8084:tensor<[2048, 2048], Float32, CPU>[@model.layers.22.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=860), symbol:model.layers.22.self_attn.o_proj.weight])[symbol:model.layers.22.self_attn.o_proj.weight] + tensor.CPU.register () -> (%357:tensor<[2048], Float32, CPU>[@model.layers.22.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=863), symbol:model.layers.22.post_attention_layernorm.weight])[symbol:model.layers.22.post_attention_layernorm.weight] + tensor.CPU.register () -> (%1068:tensor<[6144, 2048], Float32, CPU>[@model.layers.22.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=864), symbol:model.layers.22.mlp.up_proj.weight])[symbol:model.layers.22.mlp.up_proj.weight] + tensor.CPU.register () -> (%5057:tensor<[6144, 2048], Float32, CPU>[@model.layers.22.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=866), symbol:model.layers.22.mlp.gate_proj.weight])[symbol:model.layers.22.mlp.gate_proj.weight] + tensor.CPU.register () -> (%698:tensor<[2048, 6144], Float32, CPU>[@model.layers.22.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=869), symbol:model.layers.22.mlp.down_proj.weight])[symbol:model.layers.22.mlp.down_proj.weight] + tensor.CPU.register () -> (%456:tensor<[2048], Float32, CPU>[@model.layers.23.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872), symbol:model.layers.23.input_layernorm.weight])[symbol:model.layers.23.input_layernorm.weight] + tensor.CPU.register () -> (%5941:tensor<[2048, 2048], Float32, CPU>[@model.layers.23.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=873), symbol:model.layers.23.self_attn.q_proj.weight])[symbol:model.layers.23.self_attn.q_proj.weight] + tensor.CPU.register () -> (%4304:tensor<[1024, 2048], Float32, CPU>[@model.layers.23.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=875), symbol:model.layers.23.self_attn.k_proj.weight])[symbol:model.layers.23.self_attn.k_proj.weight] + tensor.CPU.register () -> (%3738:tensor<[1024, 2048], Float32, CPU>[@model.layers.23.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=877), symbol:model.layers.23.self_attn.v_proj.weight])[symbol:model.layers.23.self_attn.v_proj.weight] + tensor.CPU.register () -> (%5237:tensor<[128], Float32, CPU>[@model.layers.23.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=880), symbol:model.layers.23.self_attn.q_norm.weight])[symbol:model.layers.23.self_attn.q_norm.weight] + tensor.CPU.register () -> (%280:tensor<[128], Float32, CPU>[@model.layers.23.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=882), symbol:model.layers.23.self_attn.k_norm.weight])[symbol:model.layers.23.self_attn.k_norm.weight] + tensor.CPU.register () -> (%7541:tensor<[2048, 2048], Float32, CPU>[@model.layers.23.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=895), symbol:model.layers.23.self_attn.o_proj.weight])[symbol:model.layers.23.self_attn.o_proj.weight] + tensor.CPU.register () -> (%2827:tensor<[2048], Float32, CPU>[@model.layers.23.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=898), symbol:model.layers.23.post_attention_layernorm.weight])[symbol:model.layers.23.post_attention_layernorm.weight] + tensor.CPU.register () -> (%2427:tensor<[6144, 2048], Float32, CPU>[@model.layers.23.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=899), symbol:model.layers.23.mlp.up_proj.weight])[symbol:model.layers.23.mlp.up_proj.weight] + tensor.CPU.register () -> (%5935:tensor<[6144, 2048], Float32, CPU>[@model.layers.23.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=901), symbol:model.layers.23.mlp.gate_proj.weight])[symbol:model.layers.23.mlp.gate_proj.weight] + tensor.CPU.register () -> (%11:tensor<[2048, 6144], Float32, CPU>[@model.layers.23.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=904), symbol:model.layers.23.mlp.down_proj.weight])[symbol:model.layers.23.mlp.down_proj.weight] + tensor.CPU.register () -> (%4063:tensor<[2048], Float32, CPU>[@model.layers.24.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=907), symbol:model.layers.24.input_layernorm.weight])[symbol:model.layers.24.input_layernorm.weight] + tensor.CPU.register () -> (%1741:tensor<[2048, 2048], Float32, CPU>[@model.layers.24.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=908), symbol:model.layers.24.self_attn.q_proj.weight])[symbol:model.layers.24.self_attn.q_proj.weight] + tensor.CPU.register () -> (%7443:tensor<[1024, 2048], Float32, CPU>[@model.layers.24.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=910), symbol:model.layers.24.self_attn.k_proj.weight])[symbol:model.layers.24.self_attn.k_proj.weight] + tensor.CPU.register () -> (%3162:tensor<[1024, 2048], Float32, CPU>[@model.layers.24.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=912), symbol:model.layers.24.self_attn.v_proj.weight])[symbol:model.layers.24.self_attn.v_proj.weight] + tensor.CPU.register () -> (%5942:tensor<[128], Float32, CPU>[@model.layers.24.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=915), symbol:model.layers.24.self_attn.q_norm.weight])[symbol:model.layers.24.self_attn.q_norm.weight] + tensor.CPU.register () -> (%1980:tensor<[128], Float32, CPU>[@model.layers.24.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=917), symbol:model.layers.24.self_attn.k_norm.weight])[symbol:model.layers.24.self_attn.k_norm.weight] + tensor.CPU.register () -> (%5547:tensor<[2048, 2048], Float32, CPU>[@model.layers.24.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=930), symbol:model.layers.24.self_attn.o_proj.weight])[symbol:model.layers.24.self_attn.o_proj.weight] + tensor.CPU.register () -> (%5937:tensor<[2048], Float32, CPU>[@model.layers.24.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=933), symbol:model.layers.24.post_attention_layernorm.weight])[symbol:model.layers.24.post_attention_layernorm.weight] + tensor.CPU.register () -> (%6475:tensor<[6144, 2048], Float32, CPU>[@model.layers.24.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=934), symbol:model.layers.24.mlp.up_proj.weight])[symbol:model.layers.24.mlp.up_proj.weight] + tensor.CPU.register () -> (%7634:tensor<[6144, 2048], Float32, CPU>[@model.layers.24.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=936), symbol:model.layers.24.mlp.gate_proj.weight])[symbol:model.layers.24.mlp.gate_proj.weight] + tensor.CPU.register () -> (%2837:tensor<[2048, 6144], Float32, CPU>[@model.layers.24.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=939), symbol:model.layers.24.mlp.down_proj.weight])[symbol:model.layers.24.mlp.down_proj.weight] + tensor.CPU.register () -> (%2698:tensor<[2048], Float32, CPU>[@model.layers.25.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=942), symbol:model.layers.25.input_layernorm.weight])[symbol:model.layers.25.input_layernorm.weight] + tensor.CPU.register () -> (%7312:tensor<[2048, 2048], Float32, CPU>[@model.layers.25.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=943), symbol:model.layers.25.self_attn.q_proj.weight])[symbol:model.layers.25.self_attn.q_proj.weight] + tensor.CPU.register () -> (%8046:tensor<[1024, 2048], Float32, CPU>[@model.layers.25.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=945), symbol:model.layers.25.self_attn.k_proj.weight])[symbol:model.layers.25.self_attn.k_proj.weight] + tensor.CPU.register () -> (%8035:tensor<[1024, 2048], Float32, CPU>[@model.layers.25.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=947), symbol:model.layers.25.self_attn.v_proj.weight])[symbol:model.layers.25.self_attn.v_proj.weight] + tensor.CPU.register () -> (%5499:tensor<[128], Float32, CPU>[@model.layers.25.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=950), symbol:model.layers.25.self_attn.q_norm.weight])[symbol:model.layers.25.self_attn.q_norm.weight] + tensor.CPU.register () -> (%3571:tensor<[128], Float32, CPU>[@model.layers.25.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=952), symbol:model.layers.25.self_attn.k_norm.weight])[symbol:model.layers.25.self_attn.k_norm.weight] + tensor.CPU.register () -> (%6118:tensor<[2048, 2048], Float32, CPU>[@model.layers.25.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=965), symbol:model.layers.25.self_attn.o_proj.weight])[symbol:model.layers.25.self_attn.o_proj.weight] + tensor.CPU.register () -> (%3125:tensor<[2048], Float32, CPU>[@model.layers.25.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=968), symbol:model.layers.25.post_attention_layernorm.weight])[symbol:model.layers.25.post_attention_layernorm.weight] + tensor.CPU.register () -> (%1187:tensor<[6144, 2048], Float32, CPU>[@model.layers.25.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=969), symbol:model.layers.25.mlp.up_proj.weight])[symbol:model.layers.25.mlp.up_proj.weight] + tensor.CPU.register () -> (%327:tensor<[6144, 2048], Float32, CPU>[@model.layers.25.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=971), symbol:model.layers.25.mlp.gate_proj.weight])[symbol:model.layers.25.mlp.gate_proj.weight] + tensor.CPU.register () -> (%1157:tensor<[2048, 6144], Float32, CPU>[@model.layers.25.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=974), symbol:model.layers.25.mlp.down_proj.weight])[symbol:model.layers.25.mlp.down_proj.weight] + tensor.CPU.register () -> (%6051:tensor<[2048], Float32, CPU>[@model.layers.26.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=977), symbol:model.layers.26.input_layernorm.weight])[symbol:model.layers.26.input_layernorm.weight] + tensor.CPU.register () -> (%3763:tensor<[2048, 2048], Float32, CPU>[@model.layers.26.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=978), symbol:model.layers.26.self_attn.q_proj.weight])[symbol:model.layers.26.self_attn.q_proj.weight] + tensor.CPU.register () -> (%6974:tensor<[1024, 2048], Float32, CPU>[@model.layers.26.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=980), symbol:model.layers.26.self_attn.k_proj.weight])[symbol:model.layers.26.self_attn.k_proj.weight] + tensor.CPU.register () -> (%3131:tensor<[1024, 2048], Float32, CPU>[@model.layers.26.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=982), symbol:model.layers.26.self_attn.v_proj.weight])[symbol:model.layers.26.self_attn.v_proj.weight] + tensor.CPU.register () -> (%5543:tensor<[128], Float32, CPU>[@model.layers.26.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=985), symbol:model.layers.26.self_attn.q_norm.weight])[symbol:model.layers.26.self_attn.q_norm.weight] + tensor.CPU.register () -> (%7751:tensor<[128], Float32, CPU>[@model.layers.26.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=987), symbol:model.layers.26.self_attn.k_norm.weight])[symbol:model.layers.26.self_attn.k_norm.weight] + tensor.CPU.register () -> (%4475:tensor<[2048, 2048], Float32, CPU>[@model.layers.26.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1000), symbol:model.layers.26.self_attn.o_proj.weight])[symbol:model.layers.26.self_attn.o_proj.weight] + tensor.CPU.register () -> (%7597:tensor<[2048], Float32, CPU>[@model.layers.26.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1003), symbol:model.layers.26.post_attention_layernorm.weight])[symbol:model.layers.26.post_attention_layernorm.weight] + tensor.CPU.register () -> (%3458:tensor<[6144, 2048], Float32, CPU>[@model.layers.26.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1004), symbol:model.layers.26.mlp.up_proj.weight])[symbol:model.layers.26.mlp.up_proj.weight] + tensor.CPU.register () -> (%6097:tensor<[6144, 2048], Float32, CPU>[@model.layers.26.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1006), symbol:model.layers.26.mlp.gate_proj.weight])[symbol:model.layers.26.mlp.gate_proj.weight] + tensor.CPU.register () -> (%1186:tensor<[2048, 6144], Float32, CPU>[@model.layers.26.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1009), symbol:model.layers.26.mlp.down_proj.weight])[symbol:model.layers.26.mlp.down_proj.weight] + tensor.CPU.register () -> (%6869:tensor<[2048], Float32, CPU>[@model.layers.27.input_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1012), symbol:model.layers.27.input_layernorm.weight])[symbol:model.layers.27.input_layernorm.weight] + tensor.CPU.register () -> (%513:tensor<[2048, 2048], Float32, CPU>[@model.layers.27.self_attn.q_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1013), symbol:model.layers.27.self_attn.q_proj.weight])[symbol:model.layers.27.self_attn.q_proj.weight] + tensor.CPU.register () -> (%49:tensor<[1024, 2048], Float32, CPU>[@model.layers.27.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1015), symbol:model.layers.27.self_attn.k_proj.weight])[symbol:model.layers.27.self_attn.k_proj.weight] + tensor.CPU.register () -> (%7169:tensor<[1024, 2048], Float32, CPU>[@model.layers.27.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1017), symbol:model.layers.27.self_attn.v_proj.weight])[symbol:model.layers.27.self_attn.v_proj.weight] + tensor.CPU.register () -> (%6403:tensor<[128], Float32, CPU>[@model.layers.27.self_attn.q_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1020), symbol:model.layers.27.self_attn.q_norm.weight])[symbol:model.layers.27.self_attn.q_norm.weight] + tensor.CPU.register () -> (%3420:tensor<[128], Float32, CPU>[@model.layers.27.self_attn.k_norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1022), symbol:model.layers.27.self_attn.k_norm.weight])[symbol:model.layers.27.self_attn.k_norm.weight] + tensor.CPU.register () -> (%250:tensor<[2048, 2048], Float32, CPU>[@model.layers.27.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1035), symbol:model.layers.27.self_attn.o_proj.weight])[symbol:model.layers.27.self_attn.o_proj.weight] + tensor.CPU.register () -> (%2391:tensor<[2048], Float32, CPU>[@model.layers.27.post_attention_layernorm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038), symbol:model.layers.27.post_attention_layernorm.weight])[symbol:model.layers.27.post_attention_layernorm.weight] + tensor.CPU.register () -> (%3707:tensor<[6144, 2048], Float32, CPU>[@model.layers.27.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1039), symbol:model.layers.27.mlp.up_proj.weight])[symbol:model.layers.27.mlp.up_proj.weight] + tensor.CPU.register () -> (%6283:tensor<[6144, 2048], Float32, CPU>[@model.layers.27.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1041), symbol:model.layers.27.mlp.gate_proj.weight])[symbol:model.layers.27.mlp.gate_proj.weight] + tensor.CPU.register () -> (%2073:tensor<[2048, 6144], Float32, CPU>[@model.layers.27.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1044), symbol:model.layers.27.mlp.down_proj.weight])[symbol:model.layers.27.mlp.down_proj.weight] + tensor.CPU.register () -> (%6469:tensor<[2048], Float32, CPU>[@model.norm.weight][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1047), symbol:model.norm.weight])[symbol:model.norm.weight] + tensor.CPU.register () -> (%2672:tensor<[151936, 2048], Float32, CPU>[@lm_head.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1048), symbol:lm_head.weight])[symbol:lm_head.weight] } } graph.SubGraphOp @deinit [symbol:deinit] { @@ -321,1697 +321,2116 @@ } } - graph.CallGraphOp @model (%8013:tensor<[1, 32], Int32, CPU>[quant_recipe:QuantSpec(Raw(type: Int32), uuid=0)], %8071:tensor<[1, 32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=1)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8015:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8017:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8019:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8021:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8023:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8025:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8027:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8029:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8031:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8033:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8035:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8037:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8039:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8041:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8043:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8045:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8047:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8049:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8051:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8053:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8055:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8057:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8059:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8061:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8063:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8065:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8067:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8069:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8016:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)], %8018:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)], %8020:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)], %8022:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)], %8024:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)], %8026:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)], %8028:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)], %8030:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)], %8032:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)], %8034:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)], %8036:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)], %8038:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)], %8040:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)], %8042:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)], %8044:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)], %8046:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)], %8048:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)], %8050:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)], %8052:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)], %8054:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)], %8056:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)], %8058:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)], %8060:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)], %8062:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)], %8064:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)], %8066:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)], %8068:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)], %8070:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%9225:tensor<[1, 32, 151936], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1021)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) + graph.CallGraphOp @model (%8206:tensor<[1, 32], Int32, CPU>[quant_recipe:QuantSpec(Raw(type: Int32), uuid=0)], %8264:tensor<[32], Int32, CPU>[quant_recipe:QuantSpec(Raw(type: Int32), uuid=1)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8208:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8210:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8212:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8214:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8216:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8218:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8220:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8222:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8224:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8226:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8228:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8230:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8232:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8234:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8236:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8238:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8240:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8242:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8244:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8246:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8248:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8250:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8252:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8254:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8256:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8258:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8260:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8262:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8209:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)], %8211:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)], %8213:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)], %8215:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)], %8217:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)], %8219:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)], %8221:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)], %8223:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)], %8225:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)], %8227:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)], %8229:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)], %8231:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)], %8233:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)], %8235:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)], %8237:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)], %8239:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)], %8241:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)], %8243:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)], %8245:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)], %8247:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)], %8249:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)], %8251:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)], %8253:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)], %8255:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)], %8257:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)], %8259:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)], %8261:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)], %8263:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)]) -> (%9726:tensor<[1, 32, 151936], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1049)], %8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)], %8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)], %8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)], %8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)], %8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)], %8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)], %8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)], %8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)], %8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)], %8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)], %8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)], %8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)], %8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)], %8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)], %9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)], %9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)], %9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)], %9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)], %9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)], %9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)], %9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)], %9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)], %9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)], %9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)], %9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)], %9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)], %9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)], %9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)], %8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)], %8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)], %8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)], %8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)], %8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)], %8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)], %8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)], %8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)], %8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)], %8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)], %8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)], %8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)], %8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)], %8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)], %9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)], %9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)], %9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)], %9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)], %9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)], %9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)], %9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)], %9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)], %9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)], %9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)], %9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)], %9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)], %9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)], %9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) graph.SubGraphOp @model [using_qnn:true, symbol:model] { - (%8013:tensor<[1, 32], Int32, CPU>[quant_recipe:QuantSpec(Raw(type: Int32), uuid=0)], %8071:tensor<[1, 32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=1)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8015:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8017:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8019:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8021:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8023:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8025:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8027:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8029:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8031:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8033:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8035:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8037:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8039:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8041:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8043:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8045:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8047:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8049:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8051:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8053:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8055:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8057:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8059:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8061:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8063:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8065:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8067:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8069:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8016:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)], %8018:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)], %8020:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)], %8022:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)], %8024:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)], %8026:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)], %8028:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)], %8030:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)], %8032:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)], %8034:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)], %8036:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)], %8038:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)], %8040:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)], %8042:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)], %8044:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)], %8046:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)], %8048:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)], %8050:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)], %8052:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)], %8054:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)], %8056:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)], %8058:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)], %8060:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)], %8062:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)], %8064:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)], %8066:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)], %8068:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)], %8070:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%9225:tensor<[1, 32, 151936], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1021)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) { - linalg.CPU.EmbeddingOp (%8013:tensor<[1, 32], Int32, CPU>[quant_recipe:QuantSpec(Raw(type: Int32), uuid=0)]) -> (%8072:tensor<[1, 32, 2048], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=59)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=59), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60), weight_weight:QuantSpec(Raw(type: Float32), uuid=61))] (%8072:tensor<[1, 32, 2048], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=59)]) -> (%8073:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int64), uuid=1), outputs_0:QuantSpec(Raw(type: Int64), uuid=1), )] (%8071:tensor<[1, 32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=1)]) -> (%8071:tensor<[32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=1)]) - linalg.CPU.IndexOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), )] (%8011:tensor<[1, 1024, 128], Int16PerTensor, CPU>[@rope_sin][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), symbol:rope_sin]) -> (%8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)]) - linalg.CPU.IndexOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), )] (%8012:tensor<[1, 1024, 128], Int16PerTensor, CPU>[@rope_cos][quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), symbol:rope_cos]) -> (%8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) - graph.CallGraphOp @model.layers.0 (%8073:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8015:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8016:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%8116:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)]) - graph.CallGraphOp @model.layers.1 (%8116:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8017:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8018:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%8157:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)]) - graph.CallGraphOp @model.layers.2 (%8157:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8019:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8020:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%8198:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)]) - graph.CallGraphOp @model.layers.3 (%8198:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8021:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8022:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%8239:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)]) - graph.CallGraphOp @model.layers.4 (%8239:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8023:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8024:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%8280:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)]) - graph.CallGraphOp @model.layers.5 (%8280:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8025:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8026:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%8321:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)]) - graph.CallGraphOp @model.layers.6 (%8321:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8027:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8028:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%8362:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) - graph.CallGraphOp @model.layers.7 (%8362:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8029:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8030:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%8403:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)]) - graph.CallGraphOp @model.layers.8 (%8403:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8031:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8032:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%8444:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)]) - graph.CallGraphOp @model.layers.9 (%8444:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8033:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8034:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%8485:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)]) - graph.CallGraphOp @model.layers.10 (%8485:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8035:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8036:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%8526:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)]) - graph.CallGraphOp @model.layers.11 (%8526:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8037:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8038:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%8567:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)]) - graph.CallGraphOp @model.layers.12 (%8567:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8039:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8040:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%8608:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)]) - graph.CallGraphOp @model.layers.13 (%8608:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8041:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8042:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%8649:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)]) - graph.CallGraphOp @model.layers.14 (%8649:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8043:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8044:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%8690:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) - graph.CallGraphOp @model.layers.15 (%8690:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8045:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8046:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%8731:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)]) - graph.CallGraphOp @model.layers.16 (%8731:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8047:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8048:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%8772:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)]) - graph.CallGraphOp @model.layers.17 (%8772:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8049:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8050:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%8813:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)]) - graph.CallGraphOp @model.layers.18 (%8813:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8051:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8052:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%8854:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)]) - graph.CallGraphOp @model.layers.19 (%8854:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8053:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8054:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%8895:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)]) - graph.CallGraphOp @model.layers.20 (%8895:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8055:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8056:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%8936:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)]) - graph.CallGraphOp @model.layers.21 (%8936:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8057:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8058:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%8977:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) - graph.CallGraphOp @model.layers.22 (%8977:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8059:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8060:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%9018:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)]) - graph.CallGraphOp @model.layers.23 (%9018:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8061:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8062:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%9059:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)]) - graph.CallGraphOp @model.layers.24 (%9059:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8063:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8064:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%9100:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)]) - graph.CallGraphOp @model.layers.25 (%9100:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8065:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8066:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%9141:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)]) - graph.CallGraphOp @model.layers.26 (%9141:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8067:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8068:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%9182:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)]) - graph.CallGraphOp @model.layers.27 (%9182:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8069:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8070:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%9223:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1018), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1019))] (%9223:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)]) -> (%9224:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1018)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1018), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1021), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1020)), using_qnn:true] (%9224:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1018)]) -> (%9225:tensor<[1, 32, 151936], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1021)]) - cf.ReturnOp (%9225:tensor<[1, 32, 151936], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1021)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) -> () + (%8206:tensor<[1, 32], Int32, CPU>[quant_recipe:QuantSpec(Raw(type: Int32), uuid=0)], %8264:tensor<[32], Int32, CPU>[quant_recipe:QuantSpec(Raw(type: Int32), uuid=1)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8208:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8210:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8212:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8214:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8216:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8218:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8220:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8222:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8224:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8226:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8228:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8230:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8232:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8234:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8236:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8238:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8240:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8242:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8244:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8246:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8248:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8250:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8252:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8254:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8256:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8258:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8260:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8262:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8209:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)], %8211:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)], %8213:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)], %8215:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)], %8217:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)], %8219:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)], %8221:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)], %8223:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)], %8225:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)], %8227:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)], %8229:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)], %8231:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)], %8233:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)], %8235:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)], %8237:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)], %8239:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)], %8241:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)], %8243:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)], %8245:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)], %8247:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)], %8249:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)], %8251:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)], %8253:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)], %8255:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)], %8257:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)], %8259:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)], %8261:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)], %8263:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)]) -> (%9726:tensor<[1, 32, 151936], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1049)], %8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)], %8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)], %8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)], %8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)], %8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)], %8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)], %8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)], %8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)], %8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)], %8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)], %8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)], %8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)], %8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)], %8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)], %9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)], %9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)], %9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)], %9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)], %9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)], %9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)], %9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)], %9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)], %9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)], %9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)], %9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)], %9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)], %9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)], %9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)], %8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)], %8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)], %8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)], %8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)], %8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)], %8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)], %8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)], %8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)], %8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)], %8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)], %8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)], %8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)], %8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)], %8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)], %9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)], %9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)], %9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)], %9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)], %9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)], %9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)], %9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)], %9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)], %9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)], %9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)], %9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)], %9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)], %9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)], %9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) { + linalg.CPU.EmbeddingOp (%8206:tensor<[1, 32], Int32, CPU>[quant_recipe:QuantSpec(Raw(type: Int32), uuid=0)]) -> (%8265:tensor<[1, 32, 2048], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=59)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=59, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), weight_weight:QuantSpec(Raw(type: Float32), uuid=61, solved=0))] (%8265:tensor<[1, 32, 2048], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=59)]) -> (%8266:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.IndexOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=62, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8204:tensor<[1024, 5, 128], UInt16PerTensor, CPU>[@rope_sin][quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=62), symbol:rope_sin]) -> (%8267:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.IndexOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=64, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8205:tensor<[1024, 5, 128], UInt16PerTensor, CPU>[@rope_cos][quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=64), symbol:rope_cos]) -> (%8268:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + graph.CallGraphOp @model.layers.0 (%8266:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8208:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8209:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)]) -> (%8320:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)], %8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)]) + graph.CallGraphOp @model.layers.1 (%8320:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8210:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8211:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)]) -> (%8372:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)], %8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)]) + graph.CallGraphOp @model.layers.2 (%8372:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8212:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8213:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)]) -> (%8424:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)], %8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)]) + graph.CallGraphOp @model.layers.3 (%8424:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8214:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8215:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)]) -> (%8476:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)], %8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)]) + graph.CallGraphOp @model.layers.4 (%8476:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8216:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8217:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)]) -> (%8528:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)], %8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)]) + graph.CallGraphOp @model.layers.5 (%8528:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8218:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8219:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)]) -> (%8580:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)], %8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)]) + graph.CallGraphOp @model.layers.6 (%8580:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8220:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8221:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)]) -> (%8632:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)], %8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)]) + graph.CallGraphOp @model.layers.7 (%8632:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8222:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8223:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)]) -> (%8684:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)], %8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)]) + graph.CallGraphOp @model.layers.8 (%8684:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8224:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8225:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)]) -> (%8736:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)], %8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)]) + graph.CallGraphOp @model.layers.9 (%8736:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8226:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8227:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)]) -> (%8788:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)], %8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)]) + graph.CallGraphOp @model.layers.10 (%8788:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8228:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8229:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)]) -> (%8840:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)], %8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)]) + graph.CallGraphOp @model.layers.11 (%8840:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8230:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8231:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)]) -> (%8892:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)], %8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)]) + graph.CallGraphOp @model.layers.12 (%8892:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8232:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8233:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)]) -> (%8944:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)], %8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)]) + graph.CallGraphOp @model.layers.13 (%8944:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8234:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8235:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)]) -> (%8996:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)], %8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)]) + graph.CallGraphOp @model.layers.14 (%8996:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8236:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8237:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)]) -> (%9048:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)], %9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)]) + graph.CallGraphOp @model.layers.15 (%9048:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8238:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8239:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)]) -> (%9100:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)], %9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)]) + graph.CallGraphOp @model.layers.16 (%9100:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8240:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8241:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)]) -> (%9152:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)], %9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)]) + graph.CallGraphOp @model.layers.17 (%9152:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8242:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8243:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)]) -> (%9204:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)], %9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)]) + graph.CallGraphOp @model.layers.18 (%9204:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8244:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8245:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)]) -> (%9256:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)], %9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)]) + graph.CallGraphOp @model.layers.19 (%9256:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8246:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8247:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)]) -> (%9308:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)], %9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)]) + graph.CallGraphOp @model.layers.20 (%9308:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8248:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8249:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)]) -> (%9360:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)], %9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)]) + graph.CallGraphOp @model.layers.21 (%9360:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8250:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8251:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)]) -> (%9412:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)], %9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)]) + graph.CallGraphOp @model.layers.22 (%9412:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8252:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8253:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)]) -> (%9464:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)], %9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)]) + graph.CallGraphOp @model.layers.23 (%9464:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8254:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8255:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)]) -> (%9516:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)], %9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)]) + graph.CallGraphOp @model.layers.24 (%9516:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8256:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8257:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)]) -> (%9568:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)], %9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)]) + graph.CallGraphOp @model.layers.25 (%9568:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8258:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8259:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)]) -> (%9620:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)], %9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)]) + graph.CallGraphOp @model.layers.26 (%9620:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8260:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8261:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)]) -> (%9672:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)], %9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)]) + graph.CallGraphOp @model.layers.27 (%9672:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8262:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8263:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)]) -> (%9724:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)], %9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1046, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1047, solved=0))] (%9724:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9725:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1046)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1046, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1049, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1048, solved=0)), using_qnn:true] (%9725:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1046)]) -> (%9726:tensor<[1, 32, 151936], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1049)]) + cf.ReturnOp (%9726:tensor<[1, 32, 151936], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1049)], %8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)], %8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)], %8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)], %8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)], %8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)], %8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)], %8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)], %8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)], %8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)], %8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)], %8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)], %8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)], %8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)], %8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)], %9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)], %9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)], %9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)], %9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)], %9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)], %9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)], %9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)], %9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)], %9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)], %9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)], %9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)], %9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)], %9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)], %9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)], %8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)], %8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)], %8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)], %8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)], %8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)], %8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)], %8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)], %8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)], %8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)], %8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)], %8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)], %8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)], %8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)], %8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)], %9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)], %9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)], %9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)], %9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)], %9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)], %9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)], %9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)], %9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)], %9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)], %9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)], %9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)], %9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)], %9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)], %9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) -> () } } graph.SubGraphOp @model.layers.0 [using_qnn:true, symbol:model.layers.0] { - (%8073:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8015:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8016:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%8116:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=67))] (%8073:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)]) -> (%8076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66)]) - graph.CallGraphOp @model.layers.0.self_attn (%8076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8015:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8016:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%8108:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90), )] (%8108:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)], %8073:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)]) -> (%8109:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=91), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=92))] (%8109:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)]) -> (%8110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=91)]) - graph.CallGraphOp @model.layers.0.mlp (%8110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=91)]) -> (%8115:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), )] (%8115:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)], %8109:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)]) -> (%8116:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) - cf.ReturnOp (%8116:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)]) -> () + (%8266:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8208:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8209:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)]) -> (%8320:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)], %8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=66, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=67, solved=0))] (%8266:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8269:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=66)]) + graph.CallGraphOp @model.layers.0.self_attn (%8269:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=66)], %8267:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8208:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8209:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)]) -> (%8311:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91)], %8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)], %8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8266:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8311:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91)]) -> (%8312:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=92, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=93, solved=0))] (%8312:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8313:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=92)]) + graph.CallGraphOp @model.layers.0.mlp (%8313:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=92)]) -> (%8319:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=100)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=100, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8312:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8319:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=100)]) -> (%8320:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%8320:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)], %8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)]) -> () } } graph.SubGraphOp @model.layers.0.self_attn [using_qnn:true, symbol:model.layers.0.self_attn] { - (%8076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8015:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8016:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%8108:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)]) { - linalg.CPU.LinearOp (%8076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66)]) -> (%8077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=72)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=68))] (%8076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66)]) -> (%8078:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=70))] (%8076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=66)]) -> (%8079:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=72), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=72), )] (%8077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=72)]) -> (%8077:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=72)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=72), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=72), )] (%8077:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=72)]) -> (%8080:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=72)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69), )] (%8078:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69)]) -> (%8078:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69), )] (%8078:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69)]) -> (%8081:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71), )] (%8079:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71)]) -> (%8079:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71), )] (%8079:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71)]) -> (%8082:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=72), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=73), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=74))] (%8080:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=72)]) -> (%8083:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=73)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=75), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=76))] (%8081:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69)]) -> (%8084:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=75)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=73), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=73), )] (%8083:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=73)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8085:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=73)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=75), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=75), )] (%8084:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=75)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8086:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=75)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=75), outputs_0:QuantSpec(Raw(type: Float16), uuid=77), )] (%8086:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=75)]) -> (%8087:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=77)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=77), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78), )] (%8087:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=77)]) -> (%8088:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78), )] (%8088:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)]) -> (%8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71), outputs_0:QuantSpec(Raw(type: Float16), uuid=79), )] (%8082:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71)]) -> (%8090:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=79)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=79), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80), )] (%8090:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=79)]) -> (%8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), )] (%8015:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)]) -> (%8092:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), )] (%8016:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)]) -> (%8093:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), )] (%8092:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) -> (%8094:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), )] (%8093:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%8095:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=73), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81), )] (%8085:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=73)], %8094:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) -> (%8096:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81), inputs_1:QuantSpec(Raw(type: Float32), uuid=82), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81), )] (%8096:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81)], %8097:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=82), constant:[0.088388346]]) -> (%8098:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83), )] (%8098:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81)]) -> (%8099:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83), inputs_1:QuantSpec(Raw(type: Int16), uuid=84), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83), )] (%8099:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83)], %8100:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=84), constant:[-20]]) -> (%8101:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=85), outputs_0:QuantSpec(Raw(type: UInt8), uuid=86), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8102:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=85), constant:[0]]) -> (%8103:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=86)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=86), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83), )] (%8103:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=86)], %8098:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=81)], %8101:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83)]) -> (%8104:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87), )] (%8104:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83)]) -> (%8105:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88), )] (%8105:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87)], %8095:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%8106:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88), )] (%8106:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88)]) -> (%8107:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88), )] (%8107:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88)]) -> (%8107:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=89))] (%8107:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=88)]) -> (%8108:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)]) - cf.ReturnOp (%8108:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)], %8089:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=78)], %8091:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=80)]) -> () + (%8269:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=66)], %8267:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8208:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8209:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)]) -> (%8311:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91)], %8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)], %8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=66, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=68, solved=0))] (%8269:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=66)]) -> (%8270:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=66, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=70, solved=0))] (%8269:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=66)]) -> (%8271:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=66, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=72, solved=0))] (%8269:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=66)]) -> (%8272:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69, solved=0), )] (%8270:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69)]) -> (%8270:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69, solved=0), )] (%8270:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69)]) -> (%8273:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71, solved=0), )] (%8271:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71)]) -> (%8271:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71, solved=0), )] (%8271:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71)]) -> (%8274:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73, solved=0), )] (%8272:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73)]) -> (%8272:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73, solved=0), )] (%8272:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73)]) -> (%8275:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=0))] (%8273:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69)]) -> (%8276:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=0))] (%8274:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71)]) -> (%8277:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), )] (%8276:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) -> (%8276:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), )] (%8276:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) -> (%8276:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), )] (%8276:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) -> (%8278:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), )] (%8278:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)], %8276:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) -> (%8279:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), )] (%8279:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8280:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), )] (%8276:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8281:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), )] (%8281:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)], %8280:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) -> (%8282:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), )] (%8277:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) -> (%8277:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), )] (%8277:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) -> (%8277:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), )] (%8277:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) -> (%8283:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), )] (%8283:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)], %8277:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) -> (%8284:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), )] (%8284:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8285:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), )] (%8277:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8286:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), )] (%8286:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)], %8285:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) -> (%8287:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=78, solved=0), )] (%8287:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) -> (%8288:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=78)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=78, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79, solved=0), )] (%8288:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=78)]) -> (%8289:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79, solved=0), )] (%8289:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)]) -> (%8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=80, solved=0), )] (%8275:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73)]) -> (%8292:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=80)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=80, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81, solved=0), )] (%8292:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=80)]) -> (%8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3, solved=0), )] (%8208:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)]) -> (%8295:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=0), )] (%8209:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)], %8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)]) -> (%8296:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3, solved=0), )] (%8295:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) -> (%8297:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=0), )] (%8296:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)]) -> (%8298:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82, solved=0), )] (%8282:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)], %8297:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) -> (%8299:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=83, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82, solved=0), )] (%8299:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82)], %8300:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=83), constant:[0.088388346]]) -> (%8301:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84, solved=0), )] (%8301:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82)]) -> (%8302:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=85, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84, solved=0), )] (%8302:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84)], %8303:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=85), constant:[-20]]) -> (%8304:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=86, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=87, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8305:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=86), constant:[0]]) -> (%8306:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=87)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=87, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84, solved=0), )] (%8306:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=87)], %8301:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82)], %8304:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84)]) -> (%8307:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=0), )] (%8307:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84)]) -> (%8308:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89, solved=0), )] (%8308:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88)], %8298:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)]) -> (%8309:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89, solved=0), )] (%8309:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89)]) -> (%8310:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89, solved=0), )] (%8310:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89)]) -> (%8310:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=90, solved=0))] (%8310:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89)]) -> (%8311:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91)]) + cf.ReturnOp (%8311:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91)], %8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)], %8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)]) -> () } } graph.SubGraphOp @model.layers.0.mlp [using_qnn:true, symbol:model.layers.0.mlp] { - (%8110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=91)]) -> (%8115:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=91), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=93))] (%8110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=91)]) -> (%8111:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95), )] (%8111:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)]) -> (%8112:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=91), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=96))] (%8110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=91)]) -> (%8113:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95), )] (%8112:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95)], %8113:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97)]) -> (%8114:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=98))] (%8114:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95)]) -> (%8115:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) - cf.ReturnOp (%8115:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) -> () + (%8313:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=92)]) -> (%8319:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=100)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=92, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=95, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=94, solved=0))] (%8313:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=92)]) -> (%8314:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=95)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=92, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=96, solved=0))] (%8313:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=92)]) -> (%8315:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=98, solved=0), )] (%8315:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97)]) -> (%8316:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=98)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=98, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97, solved=0), )] (%8315:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97)], %8316:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=98)]) -> (%8317:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=95, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97, solved=0), )] (%8317:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97)], %8314:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=95)]) -> (%8318:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=100, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=99, solved=0))] (%8318:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97)]) -> (%8319:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=100)]) + cf.ReturnOp (%8319:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=100)]) -> () } } graph.SubGraphOp @model.layers.1 [using_qnn:true, symbol:model.layers.1] { - (%8116:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8017:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8018:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%8157:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=101))] (%8116:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) -> (%8117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100)]) - graph.CallGraphOp @model.layers.1.self_attn (%8117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8017:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8018:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%8149:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124), )] (%8149:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)], %8116:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) -> (%8150:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=126))] (%8150:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) -> (%8151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125)]) - graph.CallGraphOp @model.layers.1.mlp (%8151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125)]) -> (%8156:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133), )] (%8156:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)], %8150:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) -> (%8157:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)]) - cf.ReturnOp (%8157:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)]) -> () + (%8320:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8210:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8211:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)]) -> (%8372:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)], %8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=101, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=102, solved=0))] (%8320:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8321:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=101)]) + graph.CallGraphOp @model.layers.1.self_attn (%8321:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=101)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8210:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8211:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)]) -> (%8363:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126)], %8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)], %8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8320:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8363:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126)]) -> (%8364:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=127, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=128, solved=0))] (%8364:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8365:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=127)]) + graph.CallGraphOp @model.layers.1.mlp (%8365:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=127)]) -> (%8371:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=135)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=135, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8364:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8371:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=135)]) -> (%8372:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%8372:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)], %8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)]) -> () } } graph.SubGraphOp @model.layers.1.self_attn [using_qnn:true, symbol:model.layers.1.self_attn] { - (%8117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8017:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8018:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%8149:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)]) { - linalg.CPU.LinearOp (%8117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100)]) -> (%8118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=106)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=102))] (%8117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100)]) -> (%8119:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=104))] (%8117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=100)]) -> (%8120:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=106), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=106), )] (%8118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=106)]) -> (%8118:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=106)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=106), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=106), )] (%8118:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=106)]) -> (%8121:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=106)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103), )] (%8119:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103)]) -> (%8119:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103), )] (%8119:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103)]) -> (%8122:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105), )] (%8120:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105)]) -> (%8120:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105), )] (%8120:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105)]) -> (%8123:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=106), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=108))] (%8121:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=106)]) -> (%8124:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=110))] (%8122:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=103)]) -> (%8125:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107), )] (%8124:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8126:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109), )] (%8125:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8127:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109), outputs_0:QuantSpec(Raw(type: Float16), uuid=111), )] (%8127:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109)]) -> (%8128:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=111)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=111), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112), )] (%8128:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=111)]) -> (%8129:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112), )] (%8129:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)]) -> (%8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105), outputs_0:QuantSpec(Raw(type: Float16), uuid=113), )] (%8123:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=105)]) -> (%8131:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=113)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=113), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114), )] (%8131:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=113)]) -> (%8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), )] (%8017:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)]) -> (%8133:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), )] (%8018:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)]) -> (%8134:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), )] (%8133:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) -> (%8135:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), )] (%8134:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%8136:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115), )] (%8126:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107)], %8135:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) -> (%8137:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115), inputs_1:QuantSpec(Raw(type: Float32), uuid=116), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115), )] (%8137:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115)], %8138:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=116), constant:[0.088388346]]) -> (%8139:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117), )] (%8139:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115)]) -> (%8140:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117), inputs_1:QuantSpec(Raw(type: Int16), uuid=118), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117), )] (%8140:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117)], %8141:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=118), constant:[-20]]) -> (%8142:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=119), outputs_0:QuantSpec(Raw(type: UInt8), uuid=120), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8143:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=119), constant:[0]]) -> (%8144:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=120)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=120), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117), )] (%8144:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=120)], %8139:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=115)], %8142:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117)]) -> (%8145:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121), )] (%8145:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117)]) -> (%8146:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122), )] (%8146:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=121)], %8136:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%8147:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122), )] (%8147:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122)]) -> (%8148:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122), )] (%8148:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122)]) -> (%8148:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=123))] (%8148:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122)]) -> (%8149:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) - cf.ReturnOp (%8149:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)], %8130:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=112)], %8132:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=114)]) -> () + (%8321:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=101)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8210:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8211:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)]) -> (%8363:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126)], %8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)], %8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=101, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=103, solved=0))] (%8321:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=101)]) -> (%8322:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=101, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=105, solved=0))] (%8321:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=101)]) -> (%8323:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=101, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=107, solved=0))] (%8321:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=101)]) -> (%8324:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=0), )] (%8322:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104)]) -> (%8322:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=0), )] (%8322:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104)]) -> (%8325:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=0), )] (%8323:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106)]) -> (%8323:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=0), )] (%8323:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106)]) -> (%8326:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108, solved=0), )] (%8324:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108)]) -> (%8324:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108, solved=0), )] (%8324:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108)]) -> (%8327:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=110, solved=0))] (%8325:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104)]) -> (%8328:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=0))] (%8326:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106)]) -> (%8329:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), )] (%8328:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) -> (%8328:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), )] (%8328:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) -> (%8328:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), )] (%8328:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) -> (%8330:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), )] (%8330:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)], %8328:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) -> (%8331:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), )] (%8331:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8332:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), )] (%8328:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8333:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), )] (%8333:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)], %8332:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) -> (%8334:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), )] (%8329:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) -> (%8329:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), )] (%8329:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) -> (%8329:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), )] (%8329:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) -> (%8335:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), )] (%8335:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)], %8329:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) -> (%8336:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), )] (%8336:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8337:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), )] (%8329:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8338:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), )] (%8338:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)], %8337:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) -> (%8339:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=113, solved=0), )] (%8339:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) -> (%8340:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=113)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=113, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114, solved=0), )] (%8340:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=113)]) -> (%8341:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114, solved=0), )] (%8341:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)]) -> (%8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=115, solved=0), )] (%8327:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108)]) -> (%8344:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=115)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=115, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116, solved=0), )] (%8344:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=115)]) -> (%8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4, solved=0), )] (%8210:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)]) -> (%8347:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=0), )] (%8211:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)], %8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)]) -> (%8348:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4, solved=0), )] (%8347:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) -> (%8349:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=0), )] (%8348:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)]) -> (%8350:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117, solved=0), )] (%8334:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)], %8349:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) -> (%8351:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=118, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117, solved=0), )] (%8351:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117)], %8352:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=118), constant:[0.088388346]]) -> (%8353:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=0), )] (%8353:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117)]) -> (%8354:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=120, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=0), )] (%8354:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119)], %8355:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=120), constant:[-20]]) -> (%8356:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=121, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=122, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8357:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=121), constant:[0]]) -> (%8358:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=122)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=122, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=0), )] (%8358:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=122)], %8353:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117)], %8356:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119)]) -> (%8359:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123, solved=0), )] (%8359:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119)]) -> (%8360:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=0), )] (%8360:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123)], %8350:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)]) -> (%8361:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=0), )] (%8361:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124)]) -> (%8362:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=0), )] (%8362:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124)]) -> (%8362:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=125, solved=0))] (%8362:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124)]) -> (%8363:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126)]) + cf.ReturnOp (%8363:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126)], %8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)], %8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)]) -> () } } graph.SubGraphOp @model.layers.1.mlp [using_qnn:true, symbol:model.layers.1.mlp] { - (%8151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125)]) -> (%8156:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=128), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=127))] (%8151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125)]) -> (%8152:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=128)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=128), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129), )] (%8152:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=128)]) -> (%8153:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=131), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=130))] (%8151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125)]) -> (%8154:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=131)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=131), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129), )] (%8153:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129)], %8154:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=131)]) -> (%8155:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=132))] (%8155:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129)]) -> (%8156:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)]) - cf.ReturnOp (%8156:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)]) -> () + (%8365:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=127)]) -> (%8371:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=135)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=127, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=130, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=129, solved=0))] (%8365:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=127)]) -> (%8366:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=130)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=127, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=131, solved=0))] (%8365:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=127)]) -> (%8367:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=133, solved=0), )] (%8367:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132)]) -> (%8368:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=133)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=133, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132, solved=0), )] (%8367:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132)], %8368:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=133)]) -> (%8369:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=130, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132, solved=0), )] (%8369:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132)], %8366:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=130)]) -> (%8370:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=135, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=134, solved=0))] (%8370:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132)]) -> (%8371:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=135)]) + cf.ReturnOp (%8371:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=135)]) -> () } } graph.SubGraphOp @model.layers.2 [using_qnn:true, symbol:model.layers.2] { - (%8157:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8019:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8020:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%8198:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=135))] (%8157:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)]) -> (%8158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134)]) - graph.CallGraphOp @model.layers.2.self_attn (%8158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8019:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8020:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%8190:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158), )] (%8190:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)], %8157:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=133)]) -> (%8191:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=160))] (%8191:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)]) -> (%8192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159)]) - graph.CallGraphOp @model.layers.2.mlp (%8192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159)]) -> (%8197:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167), )] (%8197:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)], %8191:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)]) -> (%8198:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) - cf.ReturnOp (%8198:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)]) -> () + (%8372:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8212:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8213:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)]) -> (%8424:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)], %8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=137, solved=0))] (%8372:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8373:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136)]) + graph.CallGraphOp @model.layers.2.self_attn (%8373:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8212:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8213:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)]) -> (%8415:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161)], %8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)], %8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8372:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8415:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161)]) -> (%8416:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=163, solved=0))] (%8416:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8417:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162)]) + graph.CallGraphOp @model.layers.2.mlp (%8417:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162)]) -> (%8423:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=170)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=170, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8416:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8423:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=170)]) -> (%8424:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%8424:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)], %8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)]) -> () } } graph.SubGraphOp @model.layers.2.self_attn [using_qnn:true, symbol:model.layers.2.self_attn] { - (%8158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8019:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8020:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%8190:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)]) { - linalg.CPU.LinearOp (%8158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134)]) -> (%8159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=140)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=136))] (%8158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134)]) -> (%8160:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=138))] (%8158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=134)]) -> (%8161:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=140), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=140), )] (%8159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=140)]) -> (%8159:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=140)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=140), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=140), )] (%8159:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=140)]) -> (%8162:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=140)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137), )] (%8160:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137)]) -> (%8160:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137), )] (%8160:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137)]) -> (%8163:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139), )] (%8161:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139)]) -> (%8161:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139), )] (%8161:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139)]) -> (%8164:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=140), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=141), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=142))] (%8162:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=140)]) -> (%8165:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=141)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=144))] (%8163:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137)]) -> (%8166:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=141), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=141), )] (%8165:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=141)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8167:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=141)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143), )] (%8166:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8168:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143), outputs_0:QuantSpec(Raw(type: Float16), uuid=145), )] (%8168:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143)]) -> (%8169:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=145)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=145), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146), )] (%8169:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=145)]) -> (%8170:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146), )] (%8170:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)]) -> (%8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139), outputs_0:QuantSpec(Raw(type: Float16), uuid=147), )] (%8164:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139)]) -> (%8172:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=147)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=147), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148), )] (%8172:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=147)]) -> (%8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), )] (%8019:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)]) -> (%8174:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), )] (%8020:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)]) -> (%8175:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), )] (%8174:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) -> (%8176:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), )] (%8175:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%8177:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=141), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149), )] (%8167:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=141)], %8176:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) -> (%8178:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149), inputs_1:QuantSpec(Raw(type: Float32), uuid=150), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149), )] (%8178:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149)], %8179:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=150), constant:[0.088388346]]) -> (%8180:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151), )] (%8180:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149)]) -> (%8181:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151), inputs_1:QuantSpec(Raw(type: Int16), uuid=152), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151), )] (%8181:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151)], %8182:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=152), constant:[-20]]) -> (%8183:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=153), outputs_0:QuantSpec(Raw(type: UInt8), uuid=154), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8184:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=153), constant:[0]]) -> (%8185:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=154)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=154), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151), )] (%8185:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=154)], %8180:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149)], %8183:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151)]) -> (%8186:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155), )] (%8186:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=151)]) -> (%8187:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156), )] (%8187:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155)], %8177:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%8188:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156), )] (%8188:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156)]) -> (%8189:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156), )] (%8189:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156)]) -> (%8189:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=157))] (%8189:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=156)]) -> (%8190:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)]) - cf.ReturnOp (%8190:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=158)], %8171:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=146)], %8173:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=148)]) -> () + (%8373:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8212:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8213:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)]) -> (%8415:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161)], %8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)], %8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=138, solved=0))] (%8373:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136)]) -> (%8374:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=140, solved=0))] (%8373:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136)]) -> (%8375:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=142, solved=0))] (%8373:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136)]) -> (%8376:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139, solved=0), )] (%8374:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139)]) -> (%8374:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139, solved=0), )] (%8374:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139)]) -> (%8377:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141, solved=0), )] (%8375:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141)]) -> (%8375:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141, solved=0), )] (%8375:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141)]) -> (%8378:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143, solved=0), )] (%8376:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143)]) -> (%8376:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143, solved=0), )] (%8376:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143)]) -> (%8379:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=145, solved=0))] (%8377:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139)]) -> (%8380:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=147, solved=0))] (%8378:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141)]) -> (%8381:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), )] (%8380:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) -> (%8380:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), )] (%8380:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) -> (%8380:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), )] (%8380:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) -> (%8382:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), )] (%8382:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)], %8380:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) -> (%8383:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), )] (%8383:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8384:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), )] (%8380:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8385:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), )] (%8385:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)], %8384:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) -> (%8386:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), )] (%8381:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) -> (%8381:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), )] (%8381:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) -> (%8381:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), )] (%8381:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) -> (%8387:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), )] (%8387:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)], %8381:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) -> (%8388:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), )] (%8388:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8389:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), )] (%8381:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8390:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), )] (%8390:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)], %8389:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) -> (%8391:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=148, solved=0), )] (%8391:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) -> (%8392:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=148)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=148, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149, solved=0), )] (%8392:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=148)]) -> (%8393:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149, solved=0), )] (%8393:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)]) -> (%8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=150, solved=0), )] (%8379:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143)]) -> (%8396:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=150)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=150, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151, solved=0), )] (%8396:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=150)]) -> (%8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5, solved=0), )] (%8212:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)]) -> (%8399:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=0), )] (%8213:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)], %8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)]) -> (%8400:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5, solved=0), )] (%8399:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) -> (%8401:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=0), )] (%8400:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)]) -> (%8402:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152, solved=0), )] (%8386:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)], %8401:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) -> (%8403:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=153, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152, solved=0), )] (%8403:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152)], %8404:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=153), constant:[0.088388346]]) -> (%8405:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=0), )] (%8405:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152)]) -> (%8406:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=155, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=0), )] (%8406:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154)], %8407:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=155), constant:[-20]]) -> (%8408:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=156, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=157, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8409:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=156), constant:[0]]) -> (%8410:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=157)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=157, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=0), )] (%8410:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=157)], %8405:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152)], %8408:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154)]) -> (%8411:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=158, solved=0), )] (%8411:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154)]) -> (%8412:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=158)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=158, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=0), )] (%8412:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=158)], %8402:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)]) -> (%8413:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=0), )] (%8413:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159)]) -> (%8414:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=0), )] (%8414:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159)]) -> (%8414:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=160, solved=0))] (%8414:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159)]) -> (%8415:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161)]) + cf.ReturnOp (%8415:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161)], %8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)], %8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)]) -> () } } graph.SubGraphOp @model.layers.2.mlp [using_qnn:true, symbol:model.layers.2.mlp] { - (%8192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159)]) -> (%8197:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=161))] (%8192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159)]) -> (%8193:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=163), )] (%8193:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162)]) -> (%8194:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=163)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=165), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=164))] (%8192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159)]) -> (%8195:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=165)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=163), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=165), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=163), )] (%8194:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=163)], %8195:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=165)]) -> (%8196:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=163)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=163), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=166))] (%8196:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=163)]) -> (%8197:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) - cf.ReturnOp (%8197:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) -> () + (%8417:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162)]) -> (%8423:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=170)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=165, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=164, solved=0))] (%8417:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162)]) -> (%8418:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=165)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=166, solved=0))] (%8417:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162)]) -> (%8419:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=168, solved=0), )] (%8419:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167)]) -> (%8420:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=168)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=168, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=0), )] (%8419:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167)], %8420:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=168)]) -> (%8421:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=165, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=0), )] (%8421:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167)], %8418:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=165)]) -> (%8422:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=170, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=169, solved=0))] (%8422:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167)]) -> (%8423:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=170)]) + cf.ReturnOp (%8423:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=170)]) -> () } } graph.SubGraphOp @model.layers.3 [using_qnn:true, symbol:model.layers.3] { - (%8198:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8021:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8022:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%8239:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=169))] (%8198:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) -> (%8199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168)]) - graph.CallGraphOp @model.layers.3.self_attn (%8199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8021:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8022:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%8231:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192), )] (%8231:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)], %8198:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) -> (%8232:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=193), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=194))] (%8232:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)]) -> (%8233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=193)]) - graph.CallGraphOp @model.layers.3.mlp (%8233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=193)]) -> (%8238:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201), )] (%8238:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)], %8232:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)]) -> (%8239:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)]) - cf.ReturnOp (%8239:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)]) -> () + (%8424:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8214:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8215:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)]) -> (%8476:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)], %8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=171, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=172, solved=0))] (%8424:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8425:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=171)]) + graph.CallGraphOp @model.layers.3.self_attn (%8425:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=171)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8214:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8215:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)]) -> (%8467:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=196)], %8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)], %8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=196, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8424:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8467:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=196)]) -> (%8468:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=198, solved=0))] (%8468:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8469:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197)]) + graph.CallGraphOp @model.layers.3.mlp (%8469:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197)]) -> (%8475:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8468:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8475:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205)]) -> (%8476:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%8476:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)], %8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)]) -> () } } graph.SubGraphOp @model.layers.3.self_attn [using_qnn:true, symbol:model.layers.3.self_attn] { - (%8199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8021:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8022:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%8231:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)]) { - linalg.CPU.LinearOp (%8199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168)]) -> (%8200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=174)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=170))] (%8199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168)]) -> (%8201:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=172))] (%8199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=168)]) -> (%8202:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=174), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=174), )] (%8200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=174)]) -> (%8200:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=174)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=174), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=174), )] (%8200:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=174)]) -> (%8203:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=174)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171), )] (%8201:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171)]) -> (%8201:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171), )] (%8201:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171)]) -> (%8204:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173), )] (%8202:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173)]) -> (%8202:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173), )] (%8202:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173)]) -> (%8205:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=174), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=175), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=176))] (%8203:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=174)]) -> (%8206:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=175)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=178))] (%8204:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=171)]) -> (%8207:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=175), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=175), )] (%8206:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=175)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8208:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=175)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177), )] (%8207:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8209:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177), outputs_0:QuantSpec(Raw(type: Float16), uuid=179), )] (%8209:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177)]) -> (%8210:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=179)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=179), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180), )] (%8210:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=179)]) -> (%8211:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180), )] (%8211:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)]) -> (%8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173), outputs_0:QuantSpec(Raw(type: Float16), uuid=181), )] (%8205:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173)]) -> (%8213:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=181)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=181), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182), )] (%8213:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=181)]) -> (%8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), )] (%8021:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)]) -> (%8215:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), )] (%8022:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)]) -> (%8216:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), )] (%8215:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) -> (%8217:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), )] (%8216:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%8218:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=175), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183), )] (%8208:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=175)], %8217:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) -> (%8219:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183), inputs_1:QuantSpec(Raw(type: Float32), uuid=184), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183), )] (%8219:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183)], %8220:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=184), constant:[0.088388346]]) -> (%8221:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185), )] (%8221:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183)]) -> (%8222:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185), inputs_1:QuantSpec(Raw(type: Int16), uuid=186), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185), )] (%8222:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185)], %8223:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=186), constant:[-20]]) -> (%8224:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=187), outputs_0:QuantSpec(Raw(type: UInt8), uuid=188), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8225:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=187), constant:[0]]) -> (%8226:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=188)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=188), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185), )] (%8226:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=188)], %8221:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=183)], %8224:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185)]) -> (%8227:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189), )] (%8227:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185)]) -> (%8228:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190), )] (%8228:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189)], %8218:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%8229:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190), )] (%8229:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190)]) -> (%8230:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190), )] (%8230:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190)]) -> (%8230:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=191))] (%8230:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=190)]) -> (%8231:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)]) - cf.ReturnOp (%8231:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)], %8212:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=180)], %8214:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=182)]) -> () + (%8425:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=171)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8214:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8215:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)]) -> (%8467:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=196)], %8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)], %8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=171, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=173, solved=0))] (%8425:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=171)]) -> (%8426:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=171, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=175, solved=0))] (%8425:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=171)]) -> (%8427:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=171, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=177, solved=0))] (%8425:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=171)]) -> (%8428:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=0), )] (%8426:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174)]) -> (%8426:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=0), )] (%8426:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174)]) -> (%8429:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176, solved=0), )] (%8427:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176)]) -> (%8427:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176, solved=0), )] (%8427:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176)]) -> (%8430:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178, solved=0), )] (%8428:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178)]) -> (%8428:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178, solved=0), )] (%8428:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178)]) -> (%8431:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=180, solved=0))] (%8429:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174)]) -> (%8432:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=182, solved=0))] (%8430:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176)]) -> (%8433:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), )] (%8432:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) -> (%8432:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), )] (%8432:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) -> (%8432:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), )] (%8432:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) -> (%8434:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), )] (%8434:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)], %8432:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) -> (%8435:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), )] (%8435:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8436:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), )] (%8432:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8437:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), )] (%8437:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)], %8436:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) -> (%8438:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), )] (%8433:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) -> (%8433:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), )] (%8433:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) -> (%8433:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), )] (%8433:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) -> (%8439:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), )] (%8439:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)], %8433:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) -> (%8440:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), )] (%8440:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8441:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), )] (%8433:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8442:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), )] (%8442:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)], %8441:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) -> (%8443:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=183, solved=0), )] (%8443:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) -> (%8444:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=183)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=183, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184, solved=0), )] (%8444:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=183)]) -> (%8445:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184, solved=0), )] (%8445:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)]) -> (%8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=185, solved=0), )] (%8431:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178)]) -> (%8448:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=185)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=185, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186, solved=0), )] (%8448:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=185)]) -> (%8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6, solved=0), )] (%8214:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)]) -> (%8451:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=0), )] (%8215:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)], %8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)]) -> (%8452:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6, solved=0), )] (%8451:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) -> (%8453:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=0), )] (%8452:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)]) -> (%8454:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187, solved=0), )] (%8438:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)], %8453:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) -> (%8455:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187, solved=0), )] (%8455:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187)], %8456:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188), constant:[0.088388346]]) -> (%8457:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189, solved=0), )] (%8457:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187)]) -> (%8458:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=190, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189, solved=0), )] (%8458:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189)], %8459:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=190), constant:[-20]]) -> (%8460:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=191, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=192, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8461:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=191), constant:[0]]) -> (%8462:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=192)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=192, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189, solved=0), )] (%8462:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=192)], %8457:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187)], %8460:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189)]) -> (%8463:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=193, solved=0), )] (%8463:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189)]) -> (%8464:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=193)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=193, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194, solved=0), )] (%8464:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=193)], %8454:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)]) -> (%8465:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194, solved=0), )] (%8465:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194)]) -> (%8466:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194, solved=0), )] (%8466:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194)]) -> (%8466:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=196, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=195, solved=0))] (%8466:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194)]) -> (%8467:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=196)]) + cf.ReturnOp (%8467:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=196)], %8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)], %8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)]) -> () } } graph.SubGraphOp @model.layers.3.mlp [using_qnn:true, symbol:model.layers.3.mlp] { - (%8233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=193)]) -> (%8238:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=193), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=196), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=195))] (%8233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=193)]) -> (%8234:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=196)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=196), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197), )] (%8234:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=196)]) -> (%8235:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=193), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=198))] (%8233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=193)]) -> (%8236:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197), )] (%8235:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197)], %8236:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199)]) -> (%8237:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=200))] (%8237:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197)]) -> (%8238:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)]) - cf.ReturnOp (%8238:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)]) -> () + (%8469:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197)]) -> (%8475:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=199, solved=0))] (%8469:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197)]) -> (%8470:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=201, solved=0))] (%8469:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197)]) -> (%8471:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=203, solved=0), )] (%8471:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202)]) -> (%8472:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=203)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=203, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=0), )] (%8471:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202)], %8472:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=203)]) -> (%8473:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=0), )] (%8473:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202)], %8470:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200)]) -> (%8474:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=204, solved=0))] (%8474:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202)]) -> (%8475:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205)]) + cf.ReturnOp (%8475:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205)]) -> () } } graph.SubGraphOp @model.layers.4 [using_qnn:true, symbol:model.layers.4] { - (%8239:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8023:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8024:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%8280:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=203))] (%8239:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)]) -> (%8240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202)]) - graph.CallGraphOp @model.layers.4.self_attn (%8240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8023:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8024:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%8272:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226), )] (%8272:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)], %8239:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=201)]) -> (%8273:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=228))] (%8273:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)]) -> (%8274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227)]) - graph.CallGraphOp @model.layers.4.mlp (%8274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227)]) -> (%8279:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235), )] (%8279:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)], %8273:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)]) -> (%8280:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)]) - cf.ReturnOp (%8280:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)]) -> () + (%8476:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8216:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8217:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)]) -> (%8528:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)], %8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=206, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=207, solved=0))] (%8476:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8477:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=206)]) + graph.CallGraphOp @model.layers.4.self_attn (%8477:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=206)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8216:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8217:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)]) -> (%8519:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=231)], %8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)], %8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=231, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8476:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8519:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=231)]) -> (%8520:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=232, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=233, solved=0))] (%8520:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8521:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=232)]) + graph.CallGraphOp @model.layers.4.mlp (%8521:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=232)]) -> (%8527:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8520:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8527:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240)]) -> (%8528:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%8528:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)], %8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)]) -> () } } graph.SubGraphOp @model.layers.4.self_attn [using_qnn:true, symbol:model.layers.4.self_attn] { - (%8240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8023:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8024:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%8272:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)]) { - linalg.CPU.LinearOp (%8240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202)]) -> (%8241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=208)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=204))] (%8240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202)]) -> (%8242:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=206))] (%8240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=202)]) -> (%8243:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=208), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=208), )] (%8241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=208)]) -> (%8241:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=208)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=208), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=208), )] (%8241:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=208)]) -> (%8244:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=208)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205), )] (%8242:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205)]) -> (%8242:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205), )] (%8242:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205)]) -> (%8245:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207), )] (%8243:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207)]) -> (%8243:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207), )] (%8243:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207)]) -> (%8246:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=208), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=210))] (%8244:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=208)]) -> (%8247:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=211), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=212))] (%8245:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=205)]) -> (%8248:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=211)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209), )] (%8247:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8249:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=211), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=211), )] (%8248:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=211)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8250:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=211)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=211), outputs_0:QuantSpec(Raw(type: Float16), uuid=213), )] (%8250:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=211)]) -> (%8251:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=213)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=213), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214), )] (%8251:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=213)]) -> (%8252:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214), )] (%8252:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)]) -> (%8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207), outputs_0:QuantSpec(Raw(type: Float16), uuid=215), )] (%8246:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207)]) -> (%8254:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=215)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=215), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216), )] (%8254:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=215)]) -> (%8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), )] (%8023:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)]) -> (%8256:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), )] (%8024:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)]) -> (%8257:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), )] (%8256:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) -> (%8258:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), )] (%8257:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%8259:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217), )] (%8249:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209)], %8258:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) -> (%8260:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217), inputs_1:QuantSpec(Raw(type: Float32), uuid=218), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217), )] (%8260:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217)], %8261:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=218), constant:[0.088388346]]) -> (%8262:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219), )] (%8262:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217)]) -> (%8263:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219), inputs_1:QuantSpec(Raw(type: Int16), uuid=220), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219), )] (%8263:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219)], %8264:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=220), constant:[-20]]) -> (%8265:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=221), outputs_0:QuantSpec(Raw(type: UInt8), uuid=222), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8266:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=221), constant:[0]]) -> (%8267:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=222)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=222), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219), )] (%8267:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=222)], %8262:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217)], %8265:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219)]) -> (%8268:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223), )] (%8268:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219)]) -> (%8269:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224), )] (%8269:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=223)], %8259:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%8270:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224), )] (%8270:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224)]) -> (%8271:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224), )] (%8271:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224)]) -> (%8271:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=225))] (%8271:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=224)]) -> (%8272:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)]) - cf.ReturnOp (%8272:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=226)], %8253:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=214)], %8255:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=216)]) -> () + (%8477:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=206)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8216:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8217:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)]) -> (%8519:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=231)], %8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)], %8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=206, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=208, solved=0))] (%8477:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=206)]) -> (%8478:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=206, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=210, solved=0))] (%8477:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=206)]) -> (%8479:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=206, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=212, solved=0))] (%8477:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=206)]) -> (%8480:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209, solved=0), )] (%8478:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209)]) -> (%8478:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209, solved=0), )] (%8478:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209)]) -> (%8481:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211, solved=0), )] (%8479:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211)]) -> (%8479:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211, solved=0), )] (%8479:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211)]) -> (%8482:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213, solved=0), )] (%8480:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213)]) -> (%8480:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213, solved=0), )] (%8480:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213)]) -> (%8483:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=215, solved=0))] (%8481:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209)]) -> (%8484:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=217, solved=0))] (%8482:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211)]) -> (%8485:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), )] (%8484:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) -> (%8484:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), )] (%8484:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) -> (%8484:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), )] (%8484:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) -> (%8486:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), )] (%8486:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)], %8484:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) -> (%8487:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), )] (%8487:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8488:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), )] (%8484:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8489:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), )] (%8489:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)], %8488:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) -> (%8490:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), )] (%8485:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) -> (%8485:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), )] (%8485:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) -> (%8485:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), )] (%8485:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) -> (%8491:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), )] (%8491:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)], %8485:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) -> (%8492:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), )] (%8492:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8493:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), )] (%8485:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8494:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), )] (%8494:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)], %8493:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) -> (%8495:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=218, solved=0), )] (%8495:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) -> (%8496:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=218)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=218, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219, solved=0), )] (%8496:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=218)]) -> (%8497:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219, solved=0), )] (%8497:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)]) -> (%8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=220, solved=0), )] (%8483:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213)]) -> (%8500:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=220)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=220, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221, solved=0), )] (%8500:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=220)]) -> (%8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7, solved=0), )] (%8216:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)]) -> (%8503:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=0), )] (%8217:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)], %8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)]) -> (%8504:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7, solved=0), )] (%8503:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) -> (%8505:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=0), )] (%8504:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)]) -> (%8506:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222, solved=0), )] (%8490:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)], %8505:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) -> (%8507:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=223, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222, solved=0), )] (%8507:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222)], %8508:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=223), constant:[0.088388346]]) -> (%8509:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=0), )] (%8509:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222)]) -> (%8510:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=225, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=0), )] (%8510:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224)], %8511:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=225), constant:[-20]]) -> (%8512:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=226, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=227, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8513:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=226), constant:[0]]) -> (%8514:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=227)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=227, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=0), )] (%8514:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=227)], %8509:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222)], %8512:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224)]) -> (%8515:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=228, solved=0), )] (%8515:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224)]) -> (%8516:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=228)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=228, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=0), )] (%8516:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=228)], %8506:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)]) -> (%8517:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=0), )] (%8517:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229)]) -> (%8518:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=0), )] (%8518:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229)]) -> (%8518:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=231, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=230, solved=0))] (%8518:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229)]) -> (%8519:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=231)]) + cf.ReturnOp (%8519:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=231)], %8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)], %8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)]) -> () } } graph.SubGraphOp @model.layers.4.mlp [using_qnn:true, symbol:model.layers.4.mlp] { - (%8274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227)]) -> (%8279:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=230), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=229))] (%8274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227)]) -> (%8275:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=230)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=230), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=231), )] (%8275:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=230)]) -> (%8276:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=231)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=233), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=232))] (%8274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227)]) -> (%8277:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=233)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=231), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=233), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=231), )] (%8276:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=231)], %8277:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=233)]) -> (%8278:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=231)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=231), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=234))] (%8278:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=231)]) -> (%8279:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)]) - cf.ReturnOp (%8279:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)]) -> () + (%8521:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=232)]) -> (%8527:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=232, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=234, solved=0))] (%8521:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=232)]) -> (%8522:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=232, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=236, solved=0))] (%8521:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=232)]) -> (%8523:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=0), )] (%8523:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237)]) -> (%8524:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=0), )] (%8523:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237)], %8524:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238)]) -> (%8525:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=0), )] (%8525:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237)], %8522:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235)]) -> (%8526:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=239, solved=0))] (%8526:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237)]) -> (%8527:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240)]) + cf.ReturnOp (%8527:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240)]) -> () } } graph.SubGraphOp @model.layers.5 [using_qnn:true, symbol:model.layers.5] { - (%8280:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8025:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8026:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%8321:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=237))] (%8280:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)]) -> (%8281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)]) - graph.CallGraphOp @model.layers.5.self_attn (%8281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8025:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8026:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%8313:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260), )] (%8313:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)], %8280:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=235)]) -> (%8314:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=261), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=262))] (%8314:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)]) -> (%8315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=261)]) - graph.CallGraphOp @model.layers.5.mlp (%8315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=261)]) -> (%8320:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269), )] (%8320:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)], %8314:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)]) -> (%8321:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) - cf.ReturnOp (%8321:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)]) -> () + (%8528:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8218:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8219:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)]) -> (%8580:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)], %8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=242, solved=0))] (%8528:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8529:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241)]) + graph.CallGraphOp @model.layers.5.self_attn (%8529:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8218:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8219:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)]) -> (%8571:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=266)], %8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)], %8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=266, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8528:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8571:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=266)]) -> (%8572:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=268, solved=0))] (%8572:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8573:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267)]) + graph.CallGraphOp @model.layers.5.mlp (%8573:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267)]) -> (%8579:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8572:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8579:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275)]) -> (%8580:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%8580:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)], %8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)]) -> () } } graph.SubGraphOp @model.layers.5.self_attn [using_qnn:true, symbol:model.layers.5.self_attn] { - (%8281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8025:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8026:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%8313:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)]) { - linalg.CPU.LinearOp (%8281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)]) -> (%8282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=242)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=238))] (%8281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)]) -> (%8283:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=240))] (%8281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)]) -> (%8284:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=242), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=242), )] (%8282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=242)]) -> (%8282:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=242)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=242), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=242), )] (%8282:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=242)]) -> (%8285:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=242)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239), )] (%8283:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239)]) -> (%8283:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239), )] (%8283:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239)]) -> (%8286:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241), )] (%8284:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241)]) -> (%8284:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241), )] (%8284:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241)]) -> (%8287:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=242), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=243), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=244))] (%8285:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=242)]) -> (%8288:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=243)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=246))] (%8286:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239)]) -> (%8289:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=243), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=243), )] (%8288:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=243)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8290:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=243)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245), )] (%8289:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8291:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245), outputs_0:QuantSpec(Raw(type: Float16), uuid=247), )] (%8291:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245)]) -> (%8292:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=247)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=247), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248), )] (%8292:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=247)]) -> (%8293:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248), )] (%8293:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)]) -> (%8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241), outputs_0:QuantSpec(Raw(type: Float16), uuid=249), )] (%8287:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=241)]) -> (%8295:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=249)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=249), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250), )] (%8295:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=249)]) -> (%8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), )] (%8025:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)]) -> (%8297:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), )] (%8026:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)]) -> (%8298:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), )] (%8297:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) -> (%8299:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), )] (%8298:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%8300:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=243), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251), )] (%8290:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=243)], %8299:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) -> (%8301:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251), inputs_1:QuantSpec(Raw(type: Float32), uuid=252), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251), )] (%8301:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251)], %8302:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=252), constant:[0.088388346]]) -> (%8303:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253), )] (%8303:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251)]) -> (%8304:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253), inputs_1:QuantSpec(Raw(type: Int16), uuid=254), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253), )] (%8304:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253)], %8305:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=254), constant:[-20]]) -> (%8306:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=255), outputs_0:QuantSpec(Raw(type: UInt8), uuid=256), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8307:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=255), constant:[0]]) -> (%8308:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=256)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=256), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253), )] (%8308:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=256)], %8303:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251)], %8306:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253)]) -> (%8309:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257), )] (%8309:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=253)]) -> (%8310:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258), )] (%8310:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257)], %8300:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%8311:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258), )] (%8311:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258)]) -> (%8312:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258), )] (%8312:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258)]) -> (%8312:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=259))] (%8312:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=258)]) -> (%8313:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)]) - cf.ReturnOp (%8313:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=260)], %8294:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=248)], %8296:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=250)]) -> () + (%8529:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8218:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8219:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)]) -> (%8571:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=266)], %8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)], %8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=243, solved=0))] (%8529:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241)]) -> (%8530:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=245, solved=0))] (%8529:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241)]) -> (%8531:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=247, solved=0))] (%8529:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241)]) -> (%8532:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244, solved=0), )] (%8530:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244)]) -> (%8530:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244, solved=0), )] (%8530:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244)]) -> (%8533:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246, solved=0), )] (%8531:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246)]) -> (%8531:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246, solved=0), )] (%8531:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246)]) -> (%8534:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=0), )] (%8532:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248)]) -> (%8532:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=0), )] (%8532:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248)]) -> (%8535:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=250, solved=0))] (%8533:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244)]) -> (%8536:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=252, solved=0))] (%8534:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246)]) -> (%8537:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), )] (%8536:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) -> (%8536:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), )] (%8536:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) -> (%8536:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), )] (%8536:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) -> (%8538:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), )] (%8538:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)], %8536:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) -> (%8539:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), )] (%8539:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8540:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), )] (%8536:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8541:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), )] (%8541:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)], %8540:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) -> (%8542:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), )] (%8537:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) -> (%8537:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), )] (%8537:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) -> (%8537:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), )] (%8537:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) -> (%8543:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), )] (%8543:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)], %8537:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) -> (%8544:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), )] (%8544:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8545:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), )] (%8537:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8546:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), )] (%8546:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)], %8545:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) -> (%8547:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=253, solved=0), )] (%8547:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) -> (%8548:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=253)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=253, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254, solved=0), )] (%8548:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=253)]) -> (%8549:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254, solved=0), )] (%8549:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)]) -> (%8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=255, solved=0), )] (%8535:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248)]) -> (%8552:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=255)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=255, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256, solved=0), )] (%8552:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=255)]) -> (%8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8, solved=0), )] (%8218:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)]) -> (%8555:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=0), )] (%8219:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)], %8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)]) -> (%8556:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8, solved=0), )] (%8555:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) -> (%8557:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=0), )] (%8556:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)]) -> (%8558:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257, solved=0), )] (%8542:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)], %8557:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) -> (%8559:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=258, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257, solved=0), )] (%8559:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257)], %8560:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=258), constant:[0.088388346]]) -> (%8561:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=0), )] (%8561:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257)]) -> (%8562:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=260, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=0), )] (%8562:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259)], %8563:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=260), constant:[-20]]) -> (%8564:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=261, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=262, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8565:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=261), constant:[0]]) -> (%8566:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=262)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=262, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=0), )] (%8566:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=262)], %8561:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257)], %8564:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259)]) -> (%8567:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=263, solved=0), )] (%8567:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259)]) -> (%8568:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=263)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=263, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=0), )] (%8568:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=263)], %8558:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)]) -> (%8569:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=0), )] (%8569:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264)]) -> (%8570:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=0), )] (%8570:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264)]) -> (%8570:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=266, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=265, solved=0))] (%8570:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264)]) -> (%8571:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=266)]) + cf.ReturnOp (%8571:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=266)], %8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)], %8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)]) -> () } } graph.SubGraphOp @model.layers.5.mlp [using_qnn:true, symbol:model.layers.5.mlp] { - (%8315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=261)]) -> (%8320:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=261), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=263))] (%8315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=261)]) -> (%8316:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=265), )] (%8316:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264)]) -> (%8317:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=265)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=261), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=267), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=266))] (%8315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=261)]) -> (%8318:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=267)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=265), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=267), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=265), )] (%8317:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=265)], %8318:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=267)]) -> (%8319:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=265)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=265), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=268))] (%8319:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=265)]) -> (%8320:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) - cf.ReturnOp (%8320:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) -> () + (%8573:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267)]) -> (%8579:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=270, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=269, solved=0))] (%8573:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267)]) -> (%8574:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=270)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=271, solved=0))] (%8573:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267)]) -> (%8575:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273, solved=0), )] (%8575:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272)]) -> (%8576:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272, solved=0), )] (%8575:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272)], %8576:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273)]) -> (%8577:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=270, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272, solved=0), )] (%8577:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272)], %8574:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=270)]) -> (%8578:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=274, solved=0))] (%8578:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272)]) -> (%8579:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275)]) + cf.ReturnOp (%8579:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275)]) -> () } } graph.SubGraphOp @model.layers.6 [using_qnn:true, symbol:model.layers.6] { - (%8321:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8027:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8028:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%8362:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=271))] (%8321:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) -> (%8322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270)]) - graph.CallGraphOp @model.layers.6.self_attn (%8322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8027:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8028:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%8354:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), )] (%8354:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)], %8321:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) -> (%8355:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=295), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=296))] (%8355:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) -> (%8356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=295)]) - graph.CallGraphOp @model.layers.6.mlp (%8356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=295)]) -> (%8361:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303), )] (%8361:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)], %8355:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) -> (%8362:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)]) - cf.ReturnOp (%8362:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) -> () + (%8580:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8220:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8221:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)]) -> (%8632:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)], %8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=277, solved=0))] (%8580:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8581:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276)]) + graph.CallGraphOp @model.layers.6.self_attn (%8581:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8220:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8221:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)]) -> (%8623:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=301)], %8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)], %8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=301, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8580:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8623:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=301)]) -> (%8624:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=303, solved=0))] (%8624:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8625:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302)]) + graph.CallGraphOp @model.layers.6.mlp (%8625:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302)]) -> (%8631:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=310)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=310, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8624:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8631:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=310)]) -> (%8632:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%8632:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)], %8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)]) -> () } } graph.SubGraphOp @model.layers.6.self_attn [using_qnn:true, symbol:model.layers.6.self_attn] { - (%8322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8027:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8028:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%8354:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) { - linalg.CPU.LinearOp (%8322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270)]) -> (%8323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=276)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=272))] (%8322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270)]) -> (%8324:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=274))] (%8322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270)]) -> (%8325:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=276), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=276), )] (%8323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=276)]) -> (%8323:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=276)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=276), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=276), )] (%8323:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=276)]) -> (%8326:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=276)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273), )] (%8324:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273)]) -> (%8324:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273), )] (%8324:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273)]) -> (%8327:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275), )] (%8325:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275)]) -> (%8325:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275), )] (%8325:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275)]) -> (%8328:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=276), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=278))] (%8326:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=276)]) -> (%8329:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=280))] (%8327:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=273)]) -> (%8330:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277), )] (%8329:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8331:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279), )] (%8330:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8332:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279), outputs_0:QuantSpec(Raw(type: Float16), uuid=281), )] (%8332:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279)]) -> (%8333:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=281)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=281), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282), )] (%8333:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=281)]) -> (%8334:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282), )] (%8334:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)]) -> (%8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275), outputs_0:QuantSpec(Raw(type: Float16), uuid=283), )] (%8328:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275)]) -> (%8336:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=283)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=283), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284), )] (%8336:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=283)]) -> (%8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), )] (%8027:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)]) -> (%8338:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), )] (%8028:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) -> (%8339:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), )] (%8338:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) -> (%8340:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), )] (%8339:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%8341:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285), )] (%8331:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277)], %8340:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) -> (%8342:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285), inputs_1:QuantSpec(Raw(type: Float32), uuid=286), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285), )] (%8342:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285)], %8343:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=286), constant:[0.088388346]]) -> (%8344:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287), )] (%8344:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285)]) -> (%8345:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287), inputs_1:QuantSpec(Raw(type: Int16), uuid=288), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287), )] (%8345:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287)], %8346:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=288), constant:[-20]]) -> (%8347:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=289), outputs_0:QuantSpec(Raw(type: UInt8), uuid=290), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8348:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=289), constant:[0]]) -> (%8349:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=290)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=290), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287), )] (%8349:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=290)], %8344:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=285)], %8347:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287)]) -> (%8350:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291), )] (%8350:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287)]) -> (%8351:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292), )] (%8351:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=291)], %8341:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%8352:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292), )] (%8352:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292)]) -> (%8353:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292), )] (%8353:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292)]) -> (%8353:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=293))] (%8353:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=292)]) -> (%8354:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) - cf.ReturnOp (%8354:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)], %8335:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=282)], %8337:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) -> () + (%8581:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8220:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8221:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)]) -> (%8623:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=301)], %8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)], %8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=278, solved=0))] (%8581:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276)]) -> (%8582:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=280, solved=0))] (%8581:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276)]) -> (%8583:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=282, solved=0))] (%8581:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276)]) -> (%8584:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279, solved=0), )] (%8582:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279)]) -> (%8582:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279, solved=0), )] (%8582:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279)]) -> (%8585:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=0), )] (%8583:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281)]) -> (%8583:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=0), )] (%8583:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281)]) -> (%8586:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283, solved=0), )] (%8584:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283)]) -> (%8584:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283, solved=0), )] (%8584:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283)]) -> (%8587:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=285, solved=0))] (%8585:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279)]) -> (%8588:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=287, solved=0))] (%8586:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281)]) -> (%8589:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), )] (%8588:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) -> (%8588:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), )] (%8588:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) -> (%8588:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), )] (%8588:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) -> (%8590:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), )] (%8590:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)], %8588:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) -> (%8591:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), )] (%8591:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8592:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), )] (%8588:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8593:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), )] (%8593:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)], %8592:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) -> (%8594:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), )] (%8589:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) -> (%8589:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), )] (%8589:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) -> (%8589:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), )] (%8589:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) -> (%8595:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), )] (%8595:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)], %8589:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) -> (%8596:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), )] (%8596:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8597:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), )] (%8589:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8598:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), )] (%8598:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)], %8597:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) -> (%8599:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=288, solved=0), )] (%8599:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) -> (%8600:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=288)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=288, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289, solved=0), )] (%8600:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=288)]) -> (%8601:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289, solved=0), )] (%8601:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)]) -> (%8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=290, solved=0), )] (%8587:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283)]) -> (%8604:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=290)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=290, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291, solved=0), )] (%8604:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=290)]) -> (%8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9, solved=0), )] (%8220:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)]) -> (%8607:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=0), )] (%8221:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)], %8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)]) -> (%8608:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9, solved=0), )] (%8607:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) -> (%8609:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=0), )] (%8608:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)]) -> (%8610:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292, solved=0), )] (%8594:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)], %8609:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) -> (%8611:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=293, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292, solved=0), )] (%8611:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292)], %8612:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=293), constant:[0.088388346]]) -> (%8613:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=0), )] (%8613:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292)]) -> (%8614:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=295, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=0), )] (%8614:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294)], %8615:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=295), constant:[-20]]) -> (%8616:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=296, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=297, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8617:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=296), constant:[0]]) -> (%8618:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=297)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=297, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=0), )] (%8618:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=297)], %8613:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292)], %8616:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294)]) -> (%8619:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=298, solved=0), )] (%8619:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294)]) -> (%8620:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=298)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=298, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299, solved=0), )] (%8620:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=298)], %8610:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)]) -> (%8621:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299, solved=0), )] (%8621:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299)]) -> (%8622:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299, solved=0), )] (%8622:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299)]) -> (%8622:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=301, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=300, solved=0))] (%8622:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299)]) -> (%8623:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=301)]) + cf.ReturnOp (%8623:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=301)], %8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)], %8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)]) -> () } } graph.SubGraphOp @model.layers.6.mlp [using_qnn:true, symbol:model.layers.6.mlp] { - (%8356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=295)]) -> (%8361:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=295), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=298), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=297))] (%8356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=295)]) -> (%8357:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=298)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=298), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299), )] (%8357:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=298)]) -> (%8358:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=295), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=301), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=300))] (%8356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=295)]) -> (%8359:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=301)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=301), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299), )] (%8358:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299)], %8359:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=301)]) -> (%8360:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=302))] (%8360:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299)]) -> (%8361:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)]) - cf.ReturnOp (%8361:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)]) -> () + (%8625:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302)]) -> (%8631:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=310)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=305, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=304, solved=0))] (%8625:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302)]) -> (%8626:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=305)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=306, solved=0))] (%8625:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302)]) -> (%8627:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=308, solved=0), )] (%8627:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307)]) -> (%8628:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=308)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=308, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307, solved=0), )] (%8627:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307)], %8628:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=308)]) -> (%8629:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=305, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307, solved=0), )] (%8629:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307)], %8626:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=305)]) -> (%8630:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=310, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=309, solved=0))] (%8630:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307)]) -> (%8631:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=310)]) + cf.ReturnOp (%8631:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=310)]) -> () } } graph.SubGraphOp @model.layers.7 [using_qnn:true, symbol:model.layers.7] { - (%8362:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8029:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8030:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%8403:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=305))] (%8362:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)]) -> (%8363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)]) - graph.CallGraphOp @model.layers.7.self_attn (%8363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8029:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8030:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%8395:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328), )] (%8395:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)], %8362:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=303)]) -> (%8396:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=330))] (%8396:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)]) -> (%8397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329)]) - graph.CallGraphOp @model.layers.7.mlp (%8397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329)]) -> (%8402:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), )] (%8402:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)], %8396:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)]) -> (%8403:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) - cf.ReturnOp (%8403:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)]) -> () + (%8632:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8222:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8223:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)]) -> (%8684:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)], %8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=312, solved=0))] (%8632:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8633:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311)]) + graph.CallGraphOp @model.layers.7.self_attn (%8633:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8222:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8223:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)]) -> (%8675:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=336)], %8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)], %8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=336, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8632:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8675:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=336)]) -> (%8676:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=337, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=338, solved=0))] (%8676:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8677:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=337)]) + graph.CallGraphOp @model.layers.7.mlp (%8677:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=337)]) -> (%8683:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=345)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=345, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8676:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8683:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=345)]) -> (%8684:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%8684:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)], %8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)]) -> () } } graph.SubGraphOp @model.layers.7.self_attn [using_qnn:true, symbol:model.layers.7.self_attn] { - (%8363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8029:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8030:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%8395:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)]) { - linalg.CPU.LinearOp (%8363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)]) -> (%8364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=310)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=306))] (%8363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)]) -> (%8365:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=308))] (%8363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)]) -> (%8366:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=310), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=310), )] (%8364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=310)]) -> (%8364:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=310)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=310), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=310), )] (%8364:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=310)]) -> (%8367:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=310)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307), )] (%8365:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307)]) -> (%8365:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307), )] (%8365:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307)]) -> (%8368:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309), )] (%8366:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309)]) -> (%8366:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309), )] (%8366:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309)]) -> (%8369:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=310), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=312))] (%8367:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=310)]) -> (%8370:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=313), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=314))] (%8368:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307)]) -> (%8371:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=313)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311), )] (%8370:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8372:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=313), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=313), )] (%8371:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=313)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8373:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=313)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=313), outputs_0:QuantSpec(Raw(type: Float16), uuid=315), )] (%8373:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=313)]) -> (%8374:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=315)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=315), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316), )] (%8374:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=315)]) -> (%8375:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316), )] (%8375:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)]) -> (%8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309), outputs_0:QuantSpec(Raw(type: Float16), uuid=317), )] (%8369:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309)]) -> (%8377:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=317)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=317), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318), )] (%8377:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=317)]) -> (%8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), )] (%8029:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)]) -> (%8379:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), )] (%8030:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)]) -> (%8380:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), )] (%8379:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) -> (%8381:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), )] (%8380:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%8382:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319), )] (%8372:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311)], %8381:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) -> (%8383:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319), inputs_1:QuantSpec(Raw(type: Float32), uuid=320), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319), )] (%8383:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319)], %8384:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=320), constant:[0.088388346]]) -> (%8385:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321), )] (%8385:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319)]) -> (%8386:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321), inputs_1:QuantSpec(Raw(type: Int16), uuid=322), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321), )] (%8386:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321)], %8387:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=322), constant:[-20]]) -> (%8388:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=323), outputs_0:QuantSpec(Raw(type: UInt8), uuid=324), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8389:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=323), constant:[0]]) -> (%8390:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=324)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=324), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321), )] (%8390:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=324)], %8385:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319)], %8388:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321)]) -> (%8391:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325), )] (%8391:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=321)]) -> (%8392:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326), )] (%8392:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=325)], %8382:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%8393:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326), )] (%8393:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)]) -> (%8394:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326), )] (%8394:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)]) -> (%8394:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=327))] (%8394:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)]) -> (%8395:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)]) - cf.ReturnOp (%8395:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=328)], %8376:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %8378:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=318)]) -> () + (%8633:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8222:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8223:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)]) -> (%8675:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=336)], %8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)], %8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=313, solved=0))] (%8633:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311)]) -> (%8634:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=315, solved=0))] (%8633:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311)]) -> (%8635:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=317, solved=0))] (%8633:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311)]) -> (%8636:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=0), )] (%8634:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314)]) -> (%8634:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=0), )] (%8634:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314)]) -> (%8637:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=0), )] (%8635:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316)]) -> (%8635:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=0), )] (%8635:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316)]) -> (%8638:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=0), )] (%8636:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318)]) -> (%8636:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=0), )] (%8636:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318)]) -> (%8639:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=320, solved=0))] (%8637:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314)]) -> (%8640:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=322, solved=0))] (%8638:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316)]) -> (%8641:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), )] (%8640:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) -> (%8640:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), )] (%8640:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) -> (%8640:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), )] (%8640:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) -> (%8642:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), )] (%8642:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)], %8640:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) -> (%8643:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), )] (%8643:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8644:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), )] (%8640:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8645:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), )] (%8645:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)], %8644:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) -> (%8646:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), )] (%8641:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) -> (%8641:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), )] (%8641:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) -> (%8641:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), )] (%8641:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) -> (%8647:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), )] (%8647:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)], %8641:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) -> (%8648:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), )] (%8648:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8649:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), )] (%8641:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8650:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), )] (%8650:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)], %8649:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) -> (%8651:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=323, solved=0), )] (%8651:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) -> (%8652:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=323)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=323, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324, solved=0), )] (%8652:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=323)]) -> (%8653:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324, solved=0), )] (%8653:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)]) -> (%8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=325, solved=0), )] (%8639:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318)]) -> (%8656:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=325)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=325, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326, solved=0), )] (%8656:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=325)]) -> (%8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10, solved=0), )] (%8222:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)]) -> (%8659:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=0), )] (%8223:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)], %8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)]) -> (%8660:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10, solved=0), )] (%8659:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) -> (%8661:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=0), )] (%8660:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)]) -> (%8662:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327, solved=0), )] (%8646:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)], %8661:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) -> (%8663:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=328, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327, solved=0), )] (%8663:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327)], %8664:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=328), constant:[0.088388346]]) -> (%8665:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329, solved=0), )] (%8665:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327)]) -> (%8666:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=330, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329, solved=0), )] (%8666:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329)], %8667:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=330), constant:[-20]]) -> (%8668:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=331, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=332, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8669:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=331), constant:[0]]) -> (%8670:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=332)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=332, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329, solved=0), )] (%8670:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=332)], %8665:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327)], %8668:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329)]) -> (%8671:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=333, solved=0), )] (%8671:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329)]) -> (%8672:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=333)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=333, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=0), )] (%8672:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=333)], %8662:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)]) -> (%8673:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=0), )] (%8673:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334)]) -> (%8674:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=0), )] (%8674:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334)]) -> (%8674:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=336, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=335, solved=0))] (%8674:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334)]) -> (%8675:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=336)]) + cf.ReturnOp (%8675:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=336)], %8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)], %8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)]) -> () } } graph.SubGraphOp @model.layers.7.mlp [using_qnn:true, symbol:model.layers.7.mlp] { - (%8397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329)]) -> (%8402:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=331))] (%8397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329)]) -> (%8398:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=333), )] (%8398:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332)]) -> (%8399:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=333)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=335), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=334))] (%8397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329)]) -> (%8400:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=335)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=333), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=335), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=333), )] (%8399:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=333)], %8400:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=335)]) -> (%8401:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=333)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=333), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=336))] (%8401:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=333)]) -> (%8402:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) - cf.ReturnOp (%8402:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) -> () + (%8677:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=337)]) -> (%8683:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=345)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=337, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=339, solved=0))] (%8677:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=337)]) -> (%8678:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=337, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=341, solved=0))] (%8677:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=337)]) -> (%8679:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=343, solved=0), )] (%8679:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342)]) -> (%8680:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=343)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=343, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342, solved=0), )] (%8679:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342)], %8680:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=343)]) -> (%8681:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342, solved=0), )] (%8681:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342)], %8678:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340)]) -> (%8682:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=345, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=344, solved=0))] (%8682:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342)]) -> (%8683:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=345)]) + cf.ReturnOp (%8683:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=345)]) -> () } } graph.SubGraphOp @model.layers.8 [using_qnn:true, symbol:model.layers.8] { - (%8403:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8031:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8032:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%8444:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=339))] (%8403:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) -> (%8404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338)]) - graph.CallGraphOp @model.layers.8.self_attn (%8404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8031:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8032:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%8436:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362), )] (%8436:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)], %8403:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) -> (%8437:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=363), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364))] (%8437:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)]) -> (%8438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=363)]) - graph.CallGraphOp @model.layers.8.mlp (%8438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=363)]) -> (%8443:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371), )] (%8443:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)], %8437:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)]) -> (%8444:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) - cf.ReturnOp (%8444:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)]) -> () + (%8684:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8224:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8225:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)]) -> (%8736:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)], %8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=346, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=347, solved=0))] (%8684:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8685:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=346)]) + graph.CallGraphOp @model.layers.8.self_attn (%8685:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=346)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8224:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8225:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)]) -> (%8727:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=371)], %8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)], %8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=371, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8684:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8727:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=371)]) -> (%8728:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=373, solved=0))] (%8728:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8729:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372)]) + graph.CallGraphOp @model.layers.8.mlp (%8729:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372)]) -> (%8735:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=380)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=380, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8728:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8735:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=380)]) -> (%8736:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%8736:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)], %8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)]) -> () } } graph.SubGraphOp @model.layers.8.self_attn [using_qnn:true, symbol:model.layers.8.self_attn] { - (%8404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8031:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8032:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%8436:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)]) { - linalg.CPU.LinearOp (%8404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338)]) -> (%8405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=344)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=340))] (%8404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338)]) -> (%8406:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=342))] (%8404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=338)]) -> (%8407:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=344), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=344), )] (%8405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=344)]) -> (%8405:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=344)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=344), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=344), )] (%8405:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=344)]) -> (%8408:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=344)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341), )] (%8406:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341)]) -> (%8406:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341), )] (%8406:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341)]) -> (%8409:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343), )] (%8407:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343)]) -> (%8407:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343), )] (%8407:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343)]) -> (%8410:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=344), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=345), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=346))] (%8408:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=344)]) -> (%8411:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=345)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=348))] (%8409:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341)]) -> (%8412:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=345), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=345), )] (%8411:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=345)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8413:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=345)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347), )] (%8412:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8414:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347), outputs_0:QuantSpec(Raw(type: Float16), uuid=349), )] (%8414:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347)]) -> (%8415:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=349)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=349), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350), )] (%8415:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=349)]) -> (%8416:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350), )] (%8416:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)]) -> (%8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343), outputs_0:QuantSpec(Raw(type: Float16), uuid=351), )] (%8410:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=343)]) -> (%8418:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=351)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=351), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352), )] (%8418:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=351)]) -> (%8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), )] (%8031:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)]) -> (%8420:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), )] (%8032:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)]) -> (%8421:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), )] (%8420:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) -> (%8422:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), )] (%8421:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%8423:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=345), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353), )] (%8413:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=345)], %8422:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) -> (%8424:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353), inputs_1:QuantSpec(Raw(type: Float32), uuid=354), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353), )] (%8424:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353)], %8425:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=354), constant:[0.088388346]]) -> (%8426:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355), )] (%8426:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353)]) -> (%8427:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355), inputs_1:QuantSpec(Raw(type: Int16), uuid=356), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355), )] (%8427:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355)], %8428:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=356), constant:[-20]]) -> (%8429:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=357), outputs_0:QuantSpec(Raw(type: UInt8), uuid=358), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8430:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=357), constant:[1]]) -> (%8431:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=358)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=358), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355), )] (%8431:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=358)], %8426:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353)], %8429:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355)]) -> (%8432:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359), )] (%8432:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=355)]) -> (%8433:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360), )] (%8433:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359)], %8423:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%8434:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360), )] (%8434:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360)]) -> (%8435:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360), )] (%8435:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360)]) -> (%8435:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=361))] (%8435:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360)]) -> (%8436:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)]) - cf.ReturnOp (%8436:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)], %8417:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=350)], %8419:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=352)]) -> () + (%8685:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=346)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8224:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8225:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)]) -> (%8727:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=371)], %8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)], %8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=346, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=348, solved=0))] (%8685:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=346)]) -> (%8686:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=346, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=350, solved=0))] (%8685:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=346)]) -> (%8687:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=346, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=352, solved=0))] (%8685:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=346)]) -> (%8688:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=0), )] (%8686:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349)]) -> (%8686:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=0), )] (%8686:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349)]) -> (%8689:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=0), )] (%8687:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351)]) -> (%8687:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=0), )] (%8687:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351)]) -> (%8690:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353, solved=0), )] (%8688:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353)]) -> (%8688:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353, solved=0), )] (%8688:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353)]) -> (%8691:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=355, solved=0))] (%8689:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349)]) -> (%8692:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=357, solved=0))] (%8690:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351)]) -> (%8693:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), )] (%8692:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) -> (%8692:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), )] (%8692:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) -> (%8692:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), )] (%8692:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) -> (%8694:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), )] (%8694:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)], %8692:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) -> (%8695:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), )] (%8695:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8696:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), )] (%8692:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8697:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), )] (%8697:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)], %8696:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) -> (%8698:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), )] (%8693:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) -> (%8693:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), )] (%8693:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) -> (%8693:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), )] (%8693:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) -> (%8699:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), )] (%8699:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)], %8693:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) -> (%8700:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), )] (%8700:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8701:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), )] (%8693:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8702:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), )] (%8702:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)], %8701:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) -> (%8703:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=358, solved=0), )] (%8703:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) -> (%8704:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=358)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=358, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=0), )] (%8704:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=358)]) -> (%8705:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=0), )] (%8705:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)]) -> (%8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=360, solved=0), )] (%8691:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353)]) -> (%8708:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=360)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=360, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361, solved=0), )] (%8708:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=360)]) -> (%8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11, solved=0), )] (%8224:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)]) -> (%8711:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=0), )] (%8225:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)], %8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)]) -> (%8712:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11, solved=0), )] (%8711:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) -> (%8713:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=0), )] (%8712:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)]) -> (%8714:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=0), )] (%8698:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)], %8713:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) -> (%8715:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=363, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=0), )] (%8715:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362)], %8716:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=363), constant:[0.088388346]]) -> (%8717:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=0), )] (%8717:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362)]) -> (%8718:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=365, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=0), )] (%8718:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364)], %8719:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=365), constant:[-20]]) -> (%8720:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=366, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=367, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8721:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=366), constant:[0]]) -> (%8722:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=367)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=367, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=0), )] (%8722:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=367)], %8717:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362)], %8720:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364)]) -> (%8723:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=368, solved=0), )] (%8723:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364)]) -> (%8724:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=368)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=368, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369, solved=0), )] (%8724:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=368)], %8714:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)]) -> (%8725:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369, solved=0), )] (%8725:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369)]) -> (%8726:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369, solved=0), )] (%8726:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369)]) -> (%8726:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=371, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=370, solved=0))] (%8726:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369)]) -> (%8727:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=371)]) + cf.ReturnOp (%8727:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=371)], %8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)], %8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)]) -> () } } graph.SubGraphOp @model.layers.8.mlp [using_qnn:true, symbol:model.layers.8.mlp] { - (%8438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=363)]) -> (%8443:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=363), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=366), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=365))] (%8438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=363)]) -> (%8439:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=366)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=366), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367), )] (%8439:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=366)]) -> (%8440:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=363), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=368))] (%8438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=363)]) -> (%8441:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367), )] (%8440:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367)], %8441:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369)]) -> (%8442:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=370))] (%8442:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367)]) -> (%8443:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) - cf.ReturnOp (%8443:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) -> () + (%8729:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372)]) -> (%8735:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=380)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=375, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=374, solved=0))] (%8729:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372)]) -> (%8730:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=375)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=376, solved=0))] (%8729:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372)]) -> (%8731:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=0), )] (%8731:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377)]) -> (%8732:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377, solved=0), )] (%8731:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377)], %8732:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378)]) -> (%8733:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=375, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377, solved=0), )] (%8733:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377)], %8730:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=375)]) -> (%8734:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=380, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=379, solved=0))] (%8734:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377)]) -> (%8735:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=380)]) + cf.ReturnOp (%8735:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=380)]) -> () } } graph.SubGraphOp @model.layers.9 [using_qnn:true, symbol:model.layers.9] { - (%8444:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8033:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8034:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%8485:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=373))] (%8444:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) -> (%8445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372)]) - graph.CallGraphOp @model.layers.9.self_attn (%8445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8033:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8034:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%8477:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396), )] (%8477:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)], %8444:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) -> (%8478:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=398))] (%8478:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)]) -> (%8479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397)]) - graph.CallGraphOp @model.layers.9.mlp (%8479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397)]) -> (%8484:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405), )] (%8484:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)], %8478:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)]) -> (%8485:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)]) - cf.ReturnOp (%8485:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)]) -> () + (%8736:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8226:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8227:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)]) -> (%8788:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)], %8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=382, solved=0))] (%8736:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8737:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381)]) + graph.CallGraphOp @model.layers.9.self_attn (%8737:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8226:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8227:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)]) -> (%8779:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=406)], %8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)], %8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=406, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8736:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8779:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=406)]) -> (%8780:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=407, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=408, solved=0))] (%8780:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8781:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=407)]) + graph.CallGraphOp @model.layers.9.mlp (%8781:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=407)]) -> (%8787:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=415)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=415, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8780:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8787:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=415)]) -> (%8788:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%8788:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)], %8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)]) -> () } } graph.SubGraphOp @model.layers.9.self_attn [using_qnn:true, symbol:model.layers.9.self_attn] { - (%8445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8033:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8034:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%8477:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)]) { - linalg.CPU.LinearOp (%8445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372)]) -> (%8446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=378)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=374))] (%8445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372)]) -> (%8447:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=376))] (%8445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372)]) -> (%8448:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=378), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=378), )] (%8446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=378)]) -> (%8446:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=378)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=378), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=378), )] (%8446:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=378)]) -> (%8449:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=378)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375), )] (%8447:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375)]) -> (%8447:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375), )] (%8447:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375)]) -> (%8450:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377), )] (%8448:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377)]) -> (%8448:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377), )] (%8448:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377)]) -> (%8451:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=378), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=380))] (%8449:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=378)]) -> (%8452:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=381), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=382))] (%8450:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=375)]) -> (%8453:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=381)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379), )] (%8452:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8454:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=381), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=381), )] (%8453:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=381)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8455:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=381)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=381), outputs_0:QuantSpec(Raw(type: Float16), uuid=383), )] (%8455:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=381)]) -> (%8456:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=383)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=383), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384), )] (%8456:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=383)]) -> (%8457:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384), )] (%8457:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)]) -> (%8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377), outputs_0:QuantSpec(Raw(type: Float16), uuid=385), )] (%8451:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377)]) -> (%8459:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=385)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=385), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386), )] (%8459:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=385)]) -> (%8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), )] (%8033:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)]) -> (%8461:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), )] (%8034:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)]) -> (%8462:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), )] (%8461:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) -> (%8463:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), )] (%8462:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%8464:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387), )] (%8454:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379)], %8463:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) -> (%8465:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387), inputs_1:QuantSpec(Raw(type: Float32), uuid=388), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387), )] (%8465:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387)], %8466:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=388), constant:[0.088388346]]) -> (%8467:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389), )] (%8467:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387)]) -> (%8468:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389), inputs_1:QuantSpec(Raw(type: Int16), uuid=390), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389), )] (%8468:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389)], %8469:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=390), constant:[-20]]) -> (%8470:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=391), outputs_0:QuantSpec(Raw(type: UInt8), uuid=392), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8471:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=391), constant:[-0.1796875]]) -> (%8472:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=392)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=392), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389), )] (%8472:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=392)], %8467:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387)], %8470:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389)]) -> (%8473:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393), )] (%8473:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389)]) -> (%8474:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394), )] (%8474:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=393)], %8464:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%8475:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394), )] (%8475:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) -> (%8476:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394), )] (%8476:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) -> (%8476:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=395))] (%8476:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) -> (%8477:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)]) - cf.ReturnOp (%8477:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=396)], %8458:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=384)], %8460:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=386)]) -> () + (%8737:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8226:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8227:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)]) -> (%8779:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=406)], %8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)], %8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=383, solved=0))] (%8737:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381)]) -> (%8738:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=385, solved=0))] (%8737:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381)]) -> (%8739:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=387, solved=0))] (%8737:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381)]) -> (%8740:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384, solved=0), )] (%8738:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384)]) -> (%8738:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384, solved=0), )] (%8738:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384)]) -> (%8741:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386, solved=0), )] (%8739:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386)]) -> (%8739:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386, solved=0), )] (%8739:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386)]) -> (%8742:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388, solved=0), )] (%8740:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388)]) -> (%8740:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388, solved=0), )] (%8740:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388)]) -> (%8743:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=0))] (%8741:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384)]) -> (%8744:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=0))] (%8742:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386)]) -> (%8745:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), )] (%8744:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) -> (%8744:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), )] (%8744:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) -> (%8744:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), )] (%8744:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) -> (%8746:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), )] (%8746:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)], %8744:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) -> (%8747:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), )] (%8747:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8748:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), )] (%8744:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8749:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), )] (%8749:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)], %8748:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) -> (%8750:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), )] (%8745:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) -> (%8745:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), )] (%8745:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) -> (%8745:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), )] (%8745:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) -> (%8751:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), )] (%8751:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)], %8745:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) -> (%8752:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), )] (%8752:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8753:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), )] (%8745:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8754:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), )] (%8754:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)], %8753:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) -> (%8755:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=393, solved=0), )] (%8755:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) -> (%8756:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=393)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=393, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394, solved=0), )] (%8756:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=393)]) -> (%8757:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394, solved=0), )] (%8757:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)]) -> (%8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=395, solved=0), )] (%8743:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388)]) -> (%8760:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=395)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=395, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396, solved=0), )] (%8760:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=395)]) -> (%8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12, solved=0), )] (%8226:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)]) -> (%8763:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=0), )] (%8227:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)], %8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)]) -> (%8764:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12, solved=0), )] (%8763:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) -> (%8765:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=0), )] (%8764:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)]) -> (%8766:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397, solved=0), )] (%8750:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)], %8765:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) -> (%8767:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=398, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397, solved=0), )] (%8767:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397)], %8768:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=398), constant:[0.088388346]]) -> (%8769:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399, solved=0), )] (%8769:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397)]) -> (%8770:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=400, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399, solved=0), )] (%8770:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399)], %8771:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=400), constant:[-20]]) -> (%8772:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=401, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=402, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8773:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=401), constant:[0]]) -> (%8774:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=402)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=402, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399, solved=0), )] (%8774:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=402)], %8769:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397)], %8772:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399)]) -> (%8775:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=403, solved=0), )] (%8775:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399)]) -> (%8776:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=403)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=403, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404, solved=0), )] (%8776:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=403)], %8766:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)]) -> (%8777:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404, solved=0), )] (%8777:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404)]) -> (%8778:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404, solved=0), )] (%8778:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404)]) -> (%8778:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=406, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=405, solved=0))] (%8778:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404)]) -> (%8779:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=406)]) + cf.ReturnOp (%8779:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=406)], %8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)], %8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)]) -> () } } graph.SubGraphOp @model.layers.9.mlp [using_qnn:true, symbol:model.layers.9.mlp] { - (%8479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397)]) -> (%8484:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=400), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=399))] (%8479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397)]) -> (%8480:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=400)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=400), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401), )] (%8480:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=400)]) -> (%8481:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=403), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=402))] (%8479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397)]) -> (%8482:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=403)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=403), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401), )] (%8481:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401)], %8482:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=403)]) -> (%8483:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=404))] (%8483:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401)]) -> (%8484:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)]) - cf.ReturnOp (%8484:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)]) -> () + (%8781:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=407)]) -> (%8787:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=415)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=407, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=410, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=409, solved=0))] (%8781:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=407)]) -> (%8782:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=410)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=407, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=411, solved=0))] (%8781:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=407)]) -> (%8783:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=413, solved=0), )] (%8783:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412)]) -> (%8784:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=413)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=413, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412, solved=0), )] (%8783:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412)], %8784:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=413)]) -> (%8785:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=410, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412, solved=0), )] (%8785:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412)], %8782:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=410)]) -> (%8786:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=415, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=414, solved=0))] (%8786:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412)]) -> (%8787:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=415)]) + cf.ReturnOp (%8787:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=415)]) -> () } } graph.SubGraphOp @model.layers.10 [using_qnn:true, symbol:model.layers.10] { - (%8485:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8035:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8036:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%8526:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=407))] (%8485:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)]) -> (%8486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406)]) - graph.CallGraphOp @model.layers.10.self_attn (%8486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8035:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8036:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%8518:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430), )] (%8518:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)], %8485:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=405)]) -> (%8519:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=432))] (%8519:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)]) -> (%8520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431)]) - graph.CallGraphOp @model.layers.10.mlp (%8520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431)]) -> (%8525:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), )] (%8525:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)], %8519:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)]) -> (%8526:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) - cf.ReturnOp (%8526:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)]) -> () + (%8788:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8228:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8229:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)]) -> (%8840:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)], %8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=417, solved=0))] (%8788:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8789:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416)]) + graph.CallGraphOp @model.layers.10.self_attn (%8789:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8228:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8229:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)]) -> (%8831:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=441)], %8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)], %8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=441, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8788:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8831:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=441)]) -> (%8832:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=442, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=443, solved=0))] (%8832:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8833:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=442)]) + graph.CallGraphOp @model.layers.10.mlp (%8833:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=442)]) -> (%8839:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=450)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=450, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8832:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8839:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=450)]) -> (%8840:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%8840:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)], %8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)]) -> () } } graph.SubGraphOp @model.layers.10.self_attn [using_qnn:true, symbol:model.layers.10.self_attn] { - (%8486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8035:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8036:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%8518:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)]) { - linalg.CPU.LinearOp (%8486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406)]) -> (%8487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=412)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=408))] (%8486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406)]) -> (%8488:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=410))] (%8486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=406)]) -> (%8489:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=412), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=412), )] (%8487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=412)]) -> (%8487:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=412)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=412), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=412), )] (%8487:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=412)]) -> (%8490:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=412)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409), )] (%8488:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409)]) -> (%8488:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409), )] (%8488:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409)]) -> (%8491:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411), )] (%8489:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411)]) -> (%8489:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411), )] (%8489:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411)]) -> (%8492:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=412), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=414))] (%8490:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=412)]) -> (%8493:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=415), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=416))] (%8491:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409)]) -> (%8494:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=415)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413), )] (%8493:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8495:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=415), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=415), )] (%8494:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=415)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8496:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=415)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=415), outputs_0:QuantSpec(Raw(type: Float16), uuid=417), )] (%8496:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=415)]) -> (%8497:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=417)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=417), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418), )] (%8497:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=417)]) -> (%8498:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418), )] (%8498:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)]) -> (%8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411), outputs_0:QuantSpec(Raw(type: Float16), uuid=419), )] (%8492:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=411)]) -> (%8500:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=419)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=419), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420), )] (%8500:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=419)]) -> (%8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), )] (%8035:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)]) -> (%8502:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), )] (%8036:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)]) -> (%8503:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), )] (%8502:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) -> (%8504:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), )] (%8503:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%8505:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421), )] (%8495:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413)], %8504:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) -> (%8506:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421), inputs_1:QuantSpec(Raw(type: Float32), uuid=422), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421), )] (%8506:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421)], %8507:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=422), constant:[0.088388346]]) -> (%8508:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423), )] (%8508:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421)]) -> (%8509:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423), inputs_1:QuantSpec(Raw(type: Int16), uuid=424), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423), )] (%8509:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423)], %8510:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=424), constant:[-20]]) -> (%8511:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=425), outputs_0:QuantSpec(Raw(type: UInt8), uuid=426), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8512:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=425), constant:[-0.93359375]]) -> (%8513:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=426)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=426), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423), )] (%8513:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=426)], %8508:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=421)], %8511:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423)]) -> (%8514:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427), )] (%8514:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=423)]) -> (%8515:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428), )] (%8515:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427)], %8505:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%8516:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428), )] (%8516:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428)]) -> (%8517:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428), )] (%8517:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428)]) -> (%8517:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=429))] (%8517:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=428)]) -> (%8518:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)]) - cf.ReturnOp (%8518:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=430)], %8499:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=418)], %8501:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=420)]) -> () + (%8789:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8228:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8229:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)]) -> (%8831:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=441)], %8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)], %8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=418, solved=0))] (%8789:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416)]) -> (%8790:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=420, solved=0))] (%8789:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416)]) -> (%8791:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=422, solved=0))] (%8789:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416)]) -> (%8792:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=0), )] (%8790:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419)]) -> (%8790:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=0), )] (%8790:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419)]) -> (%8793:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421, solved=0), )] (%8791:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421)]) -> (%8791:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421, solved=0), )] (%8791:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421)]) -> (%8794:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=0), )] (%8792:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423)]) -> (%8792:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=0), )] (%8792:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423)]) -> (%8795:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=425, solved=0))] (%8793:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419)]) -> (%8796:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=427, solved=0))] (%8794:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421)]) -> (%8797:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), )] (%8796:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) -> (%8796:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), )] (%8796:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) -> (%8796:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), )] (%8796:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) -> (%8798:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), )] (%8798:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)], %8796:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) -> (%8799:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), )] (%8799:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8800:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), )] (%8796:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8801:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), )] (%8801:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)], %8800:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) -> (%8802:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), )] (%8797:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) -> (%8797:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), )] (%8797:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) -> (%8797:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), )] (%8797:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) -> (%8803:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), )] (%8803:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)], %8797:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) -> (%8804:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), )] (%8804:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8805:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), )] (%8797:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8806:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), )] (%8806:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)], %8805:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) -> (%8807:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=428, solved=0), )] (%8807:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) -> (%8808:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=428)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=428, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429, solved=0), )] (%8808:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=428)]) -> (%8809:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429, solved=0), )] (%8809:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)]) -> (%8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=430, solved=0), )] (%8795:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423)]) -> (%8812:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=430)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=430, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431, solved=0), )] (%8812:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=430)]) -> (%8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13, solved=0), )] (%8228:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)]) -> (%8815:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=0), )] (%8229:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)], %8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)]) -> (%8816:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13, solved=0), )] (%8815:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) -> (%8817:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=0), )] (%8816:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)]) -> (%8818:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=0), )] (%8802:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)], %8817:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) -> (%8819:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=433, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=0), )] (%8819:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432)], %8820:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=433), constant:[0.088388346]]) -> (%8821:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434, solved=0), )] (%8821:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432)]) -> (%8822:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=435, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434, solved=0), )] (%8822:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434)], %8823:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=435), constant:[-20]]) -> (%8824:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=436, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=437, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8825:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=436), constant:[0]]) -> (%8826:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=437)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=437, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434, solved=0), )] (%8826:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=437)], %8821:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432)], %8824:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434)]) -> (%8827:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=438, solved=0), )] (%8827:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434)]) -> (%8828:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=438)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=438, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439, solved=0), )] (%8828:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=438)], %8818:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)]) -> (%8829:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439, solved=0), )] (%8829:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439)]) -> (%8830:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439, solved=0), )] (%8830:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439)]) -> (%8830:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=441, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=440, solved=0))] (%8830:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439)]) -> (%8831:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=441)]) + cf.ReturnOp (%8831:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=441)], %8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)], %8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)]) -> () } } graph.SubGraphOp @model.layers.10.mlp [using_qnn:true, symbol:model.layers.10.mlp] { - (%8520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431)]) -> (%8525:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=434), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=433))] (%8520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431)]) -> (%8521:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=434)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=434), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=435), )] (%8521:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=434)]) -> (%8522:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=435)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=437), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=436))] (%8520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431)]) -> (%8523:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=437)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=435), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=437), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=435), )] (%8522:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=435)], %8523:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=437)]) -> (%8524:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=435)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=435), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=438))] (%8524:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=435)]) -> (%8525:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) - cf.ReturnOp (%8525:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) -> () + (%8833:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=442)]) -> (%8839:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=450)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=442, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=445, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=444, solved=0))] (%8833:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=442)]) -> (%8834:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=445)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=442, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=446, solved=0))] (%8833:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=442)]) -> (%8835:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=448, solved=0), )] (%8835:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447)]) -> (%8836:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=448)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=448, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447, solved=0), )] (%8835:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447)], %8836:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=448)]) -> (%8837:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=445, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447, solved=0), )] (%8837:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447)], %8834:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=445)]) -> (%8838:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=450, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=449, solved=0))] (%8838:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447)]) -> (%8839:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=450)]) + cf.ReturnOp (%8839:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=450)]) -> () } } graph.SubGraphOp @model.layers.11 [using_qnn:true, symbol:model.layers.11] { - (%8526:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8037:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8038:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%8567:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=441))] (%8526:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) -> (%8527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440)]) - graph.CallGraphOp @model.layers.11.self_attn (%8527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8037:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8038:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%8559:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464), )] (%8559:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)], %8526:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) -> (%8560:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=465), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=466))] (%8560:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)]) -> (%8561:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=465)]) - graph.CallGraphOp @model.layers.11.mlp (%8561:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=465)]) -> (%8566:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473), )] (%8566:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)], %8560:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)]) -> (%8567:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) - cf.ReturnOp (%8567:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)]) -> () + (%8840:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8230:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8231:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)]) -> (%8892:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)], %8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=451, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=452, solved=0))] (%8840:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8841:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=451)]) + graph.CallGraphOp @model.layers.11.self_attn (%8841:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=451)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8230:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8231:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)]) -> (%8883:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476)], %8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)], %8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8840:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8883:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476)]) -> (%8884:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=477, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=478, solved=0))] (%8884:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8885:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=477)]) + graph.CallGraphOp @model.layers.11.mlp (%8885:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=477)]) -> (%8891:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=485)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=485, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8884:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8891:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=485)]) -> (%8892:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%8892:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)], %8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)]) -> () } } graph.SubGraphOp @model.layers.11.self_attn [using_qnn:true, symbol:model.layers.11.self_attn] { - (%8527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8037:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8038:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%8559:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)]) { - linalg.CPU.LinearOp (%8527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440)]) -> (%8528:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=446)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=442))] (%8527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440)]) -> (%8529:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=444))] (%8527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=440)]) -> (%8530:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=446), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=446), )] (%8528:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=446)]) -> (%8528:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=446)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=446), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=446), )] (%8528:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=446)]) -> (%8531:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=446)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443), )] (%8529:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443)]) -> (%8529:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443), )] (%8529:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443)]) -> (%8532:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445), )] (%8530:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445)]) -> (%8530:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445), )] (%8530:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445)]) -> (%8533:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=446), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=448))] (%8531:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=446)]) -> (%8534:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=450))] (%8532:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443)]) -> (%8535:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447), )] (%8534:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8536:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449), )] (%8535:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8537:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449), outputs_0:QuantSpec(Raw(type: Float16), uuid=451), )] (%8537:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449)]) -> (%8538:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=451)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=451), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452), )] (%8538:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=451)]) -> (%8539:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452), )] (%8539:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)]) -> (%8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445), outputs_0:QuantSpec(Raw(type: Float16), uuid=453), )] (%8533:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=445)]) -> (%8541:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=453)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=453), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454), )] (%8541:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=453)]) -> (%8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), )] (%8037:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)]) -> (%8543:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), )] (%8038:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)]) -> (%8544:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), )] (%8543:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) -> (%8545:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), )] (%8544:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%8546:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455), )] (%8536:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447)], %8545:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) -> (%8547:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455), inputs_1:QuantSpec(Raw(type: Float32), uuid=456), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455), )] (%8547:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455)], %8548:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=456), constant:[0.088388346]]) -> (%8549:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457), )] (%8549:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455)]) -> (%8550:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457), inputs_1:QuantSpec(Raw(type: Int16), uuid=458), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457), )] (%8550:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457)], %8551:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=458), constant:[-20]]) -> (%8552:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=459), outputs_0:QuantSpec(Raw(type: UInt8), uuid=460), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8553:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=459), constant:[0.515625]]) -> (%8554:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=460)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=460), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457), )] (%8554:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=460)], %8549:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455)], %8552:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457)]) -> (%8555:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461), )] (%8555:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457)]) -> (%8556:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462), )] (%8556:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461)], %8546:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%8557:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462), )] (%8557:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462)]) -> (%8558:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462), )] (%8558:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462)]) -> (%8558:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=463))] (%8558:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462)]) -> (%8559:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)]) - cf.ReturnOp (%8559:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=464)], %8540:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=452)], %8542:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=454)]) -> () + (%8841:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=451)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8230:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8231:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)]) -> (%8883:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476)], %8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)], %8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=451, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=453, solved=0))] (%8841:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=451)]) -> (%8842:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=451, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=455, solved=0))] (%8841:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=451)]) -> (%8843:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=451, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=457, solved=0))] (%8841:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=451)]) -> (%8844:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=0), )] (%8842:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454)]) -> (%8842:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=0), )] (%8842:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454)]) -> (%8845:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456, solved=0), )] (%8843:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456)]) -> (%8843:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456, solved=0), )] (%8843:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456)]) -> (%8846:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=0), )] (%8844:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458)]) -> (%8844:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=0), )] (%8844:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458)]) -> (%8847:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=460, solved=0))] (%8845:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454)]) -> (%8848:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=462, solved=0))] (%8846:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456)]) -> (%8849:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), )] (%8848:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) -> (%8848:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), )] (%8848:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) -> (%8848:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), )] (%8848:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) -> (%8850:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), )] (%8850:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)], %8848:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) -> (%8851:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), )] (%8851:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8852:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), )] (%8848:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8853:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), )] (%8853:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)], %8852:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) -> (%8854:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), )] (%8849:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) -> (%8849:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), )] (%8849:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) -> (%8849:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), )] (%8849:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) -> (%8855:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), )] (%8855:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)], %8849:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) -> (%8856:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), )] (%8856:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8857:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), )] (%8849:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8858:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), )] (%8858:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)], %8857:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) -> (%8859:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=463, solved=0), )] (%8859:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) -> (%8860:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=463)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=463, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464, solved=0), )] (%8860:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=463)]) -> (%8861:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464, solved=0), )] (%8861:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)]) -> (%8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=465, solved=0), )] (%8847:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458)]) -> (%8864:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=465)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=465, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466, solved=0), )] (%8864:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=465)]) -> (%8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14, solved=0), )] (%8230:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)]) -> (%8867:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=0), )] (%8231:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)], %8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)]) -> (%8868:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14, solved=0), )] (%8867:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) -> (%8869:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=0), )] (%8868:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)]) -> (%8870:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467, solved=0), )] (%8854:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)], %8869:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) -> (%8871:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467, solved=0), )] (%8871:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467)], %8872:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468), constant:[0.088388346]]) -> (%8873:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469, solved=0), )] (%8873:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467)]) -> (%8874:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=470, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469, solved=0), )] (%8874:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469)], %8875:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=470), constant:[-20]]) -> (%8876:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=471, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=472, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8877:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=471), constant:[0]]) -> (%8878:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=472)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=472, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469, solved=0), )] (%8878:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=472)], %8873:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467)], %8876:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469)]) -> (%8879:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=473, solved=0), )] (%8879:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469)]) -> (%8880:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=473)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=473, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474, solved=0), )] (%8880:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=473)], %8870:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)]) -> (%8881:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474, solved=0), )] (%8881:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474)]) -> (%8882:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474, solved=0), )] (%8882:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474)]) -> (%8882:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=475, solved=0))] (%8882:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474)]) -> (%8883:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476)]) + cf.ReturnOp (%8883:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476)], %8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)], %8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)]) -> () } } graph.SubGraphOp @model.layers.11.mlp [using_qnn:true, symbol:model.layers.11.mlp] { - (%8561:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=465)]) -> (%8566:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=465), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=468), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=467))] (%8561:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=465)]) -> (%8562:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=468)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=468), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469), )] (%8562:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=468)]) -> (%8563:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=465), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=471), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=470))] (%8561:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=465)]) -> (%8564:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=471)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=471), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469), )] (%8563:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469)], %8564:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=471)]) -> (%8565:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=472))] (%8565:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469)]) -> (%8566:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) - cf.ReturnOp (%8566:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) -> () + (%8885:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=477)]) -> (%8891:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=485)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=477, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=480, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=479, solved=0))] (%8885:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=477)]) -> (%8886:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=480)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=477, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=481, solved=0))] (%8885:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=477)]) -> (%8887:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=483, solved=0), )] (%8887:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482)]) -> (%8888:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=483)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=483, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482, solved=0), )] (%8887:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482)], %8888:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=483)]) -> (%8889:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=480, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482, solved=0), )] (%8889:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482)], %8886:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=480)]) -> (%8890:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=485, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=484, solved=0))] (%8890:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482)]) -> (%8891:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=485)]) + cf.ReturnOp (%8891:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=485)]) -> () } } graph.SubGraphOp @model.layers.12 [using_qnn:true, symbol:model.layers.12] { - (%8567:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8039:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8040:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%8608:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=475))] (%8567:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) -> (%8568:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474)]) - graph.CallGraphOp @model.layers.12.self_attn (%8568:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8039:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8040:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%8600:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498), )] (%8600:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)], %8567:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) -> (%8601:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=500))] (%8601:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)]) -> (%8602:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499)]) - graph.CallGraphOp @model.layers.12.mlp (%8602:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499)]) -> (%8607:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507), )] (%8607:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)], %8601:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)]) -> (%8608:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) - cf.ReturnOp (%8608:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)]) -> () + (%8892:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8232:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8233:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)]) -> (%8944:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)], %8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=487, solved=0))] (%8892:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8893:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486)]) + graph.CallGraphOp @model.layers.12.self_attn (%8893:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8232:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8233:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)]) -> (%8935:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=511)], %8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)], %8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=511, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8892:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8935:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=511)]) -> (%8936:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=512, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=513, solved=0))] (%8936:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8937:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=512)]) + graph.CallGraphOp @model.layers.12.mlp (%8937:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=512)]) -> (%8943:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=520)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=520, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8936:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8943:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=520)]) -> (%8944:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%8944:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)], %8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)]) -> () } } graph.SubGraphOp @model.layers.12.self_attn [using_qnn:true, symbol:model.layers.12.self_attn] { - (%8568:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8039:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8040:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%8600:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)]) { - linalg.CPU.LinearOp (%8568:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474)]) -> (%8569:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=480)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=476))] (%8568:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474)]) -> (%8570:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=478))] (%8568:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474)]) -> (%8571:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=480), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=480), )] (%8569:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=480)]) -> (%8569:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=480)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=480), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=480), )] (%8569:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=480)]) -> (%8572:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=480)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477), )] (%8570:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477)]) -> (%8570:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477), )] (%8570:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477)]) -> (%8573:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479), )] (%8571:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479)]) -> (%8571:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479), )] (%8571:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479)]) -> (%8574:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=480), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=481), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=482))] (%8572:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=480)]) -> (%8575:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=481)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=483), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484))] (%8573:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477)]) -> (%8576:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=483)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=481), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=481), )] (%8575:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=481)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8577:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=481)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=483), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=483), )] (%8576:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=483)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8578:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=483)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=483), outputs_0:QuantSpec(Raw(type: Float16), uuid=485), )] (%8578:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=483)]) -> (%8579:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=485)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=485), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486), )] (%8579:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=485)]) -> (%8580:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486), )] (%8580:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)]) -> (%8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479), outputs_0:QuantSpec(Raw(type: Float16), uuid=487), )] (%8574:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479)]) -> (%8582:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=487)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=487), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488), )] (%8582:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=487)]) -> (%8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), )] (%8039:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)]) -> (%8584:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), )] (%8040:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)]) -> (%8585:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), )] (%8584:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) -> (%8586:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), )] (%8585:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%8587:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=481), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489), )] (%8577:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=481)], %8586:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) -> (%8588:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489), inputs_1:QuantSpec(Raw(type: Float32), uuid=490), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489), )] (%8588:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489)], %8589:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=490), constant:[0.088388346]]) -> (%8590:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491), )] (%8590:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489)]) -> (%8591:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491), inputs_1:QuantSpec(Raw(type: Int16), uuid=492), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491), )] (%8591:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491)], %8592:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=492), constant:[-20]]) -> (%8593:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=493), outputs_0:QuantSpec(Raw(type: UInt8), uuid=494), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8594:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=493), constant:[0.74609375]]) -> (%8595:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=494)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=494), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491), )] (%8595:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=494)], %8590:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489)], %8593:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491)]) -> (%8596:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495), )] (%8596:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491)]) -> (%8597:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496), )] (%8597:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=495)], %8587:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%8598:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496), )] (%8598:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496)]) -> (%8599:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496), )] (%8599:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496)]) -> (%8599:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=497))] (%8599:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=496)]) -> (%8600:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)]) - cf.ReturnOp (%8600:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=498)], %8581:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=486)], %8583:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=488)]) -> () + (%8893:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8232:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8233:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)]) -> (%8935:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=511)], %8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)], %8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=488, solved=0))] (%8893:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486)]) -> (%8894:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=490, solved=0))] (%8893:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486)]) -> (%8895:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=492, solved=0))] (%8893:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486)]) -> (%8896:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489, solved=0), )] (%8894:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489)]) -> (%8894:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489, solved=0), )] (%8894:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489)]) -> (%8897:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491, solved=0), )] (%8895:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491)]) -> (%8895:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491, solved=0), )] (%8895:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491)]) -> (%8898:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493, solved=0), )] (%8896:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493)]) -> (%8896:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493, solved=0), )] (%8896:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493)]) -> (%8899:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=495, solved=0))] (%8897:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489)]) -> (%8900:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=497, solved=0))] (%8898:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491)]) -> (%8901:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), )] (%8900:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) -> (%8900:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), )] (%8900:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) -> (%8900:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), )] (%8900:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) -> (%8902:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), )] (%8902:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)], %8900:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) -> (%8903:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), )] (%8903:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8904:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), )] (%8900:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8905:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), )] (%8905:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)], %8904:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) -> (%8906:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), )] (%8901:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) -> (%8901:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), )] (%8901:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) -> (%8901:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), )] (%8901:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) -> (%8907:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), )] (%8907:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)], %8901:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) -> (%8908:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), )] (%8908:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8909:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), )] (%8901:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8910:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), )] (%8910:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)], %8909:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) -> (%8911:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=498, solved=0), )] (%8911:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) -> (%8912:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=498)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=498, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499, solved=0), )] (%8912:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=498)]) -> (%8913:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499, solved=0), )] (%8913:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)]) -> (%8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=500, solved=0), )] (%8899:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493)]) -> (%8916:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=500)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=500, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501, solved=0), )] (%8916:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=500)]) -> (%8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15, solved=0), )] (%8232:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)]) -> (%8919:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=0), )] (%8233:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)], %8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)]) -> (%8920:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15, solved=0), )] (%8919:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) -> (%8921:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=0), )] (%8920:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)]) -> (%8922:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502, solved=0), )] (%8906:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)], %8921:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) -> (%8923:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=503, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502, solved=0), )] (%8923:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502)], %8924:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=503), constant:[0.088388346]]) -> (%8925:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=0), )] (%8925:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502)]) -> (%8926:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=505, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=0), )] (%8926:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504)], %8927:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=505), constant:[-20]]) -> (%8928:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=506, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=507, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8929:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=506), constant:[0]]) -> (%8930:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=507)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=507, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=0), )] (%8930:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=507)], %8925:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502)], %8928:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504)]) -> (%8931:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508, solved=0), )] (%8931:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504)]) -> (%8932:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=0), )] (%8932:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508)], %8922:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)]) -> (%8933:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=0), )] (%8933:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509)]) -> (%8934:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=0), )] (%8934:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509)]) -> (%8934:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=511, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=510, solved=0))] (%8934:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509)]) -> (%8935:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=511)]) + cf.ReturnOp (%8935:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=511)], %8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)], %8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)]) -> () } } graph.SubGraphOp @model.layers.12.mlp [using_qnn:true, symbol:model.layers.12.mlp] { - (%8602:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499)]) -> (%8607:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=502), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=501))] (%8602:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499)]) -> (%8603:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=502)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=502), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503), )] (%8603:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=502)]) -> (%8604:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=505), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=504))] (%8602:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499)]) -> (%8605:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=505)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=505), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503), )] (%8604:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503)], %8605:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=505)]) -> (%8606:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=506))] (%8606:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503)]) -> (%8607:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) - cf.ReturnOp (%8607:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) -> () + (%8937:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=512)]) -> (%8943:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=520)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=512, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=515, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=514, solved=0))] (%8937:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=512)]) -> (%8938:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=515)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=512, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=516, solved=0))] (%8937:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=512)]) -> (%8939:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=518, solved=0), )] (%8939:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517)]) -> (%8940:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=518)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=518, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517, solved=0), )] (%8939:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517)], %8940:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=518)]) -> (%8941:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=515, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517, solved=0), )] (%8941:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517)], %8938:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=515)]) -> (%8942:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=520, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=519, solved=0))] (%8942:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517)]) -> (%8943:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=520)]) + cf.ReturnOp (%8943:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=520)]) -> () } } graph.SubGraphOp @model.layers.13 [using_qnn:true, symbol:model.layers.13] { - (%8608:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8041:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8042:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%8649:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=509))] (%8608:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) -> (%8609:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508)]) - graph.CallGraphOp @model.layers.13.self_attn (%8609:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8041:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8042:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%8641:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532), )] (%8641:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)], %8608:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) -> (%8642:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=534))] (%8642:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)]) -> (%8643:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533)]) - graph.CallGraphOp @model.layers.13.mlp (%8643:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533)]) -> (%8648:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541), )] (%8648:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)], %8642:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)]) -> (%8649:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)]) - cf.ReturnOp (%8649:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)]) -> () + (%8944:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8234:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8235:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)]) -> (%8996:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)], %8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=522, solved=0))] (%8944:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8945:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521)]) + graph.CallGraphOp @model.layers.13.self_attn (%8945:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8234:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8235:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)]) -> (%8987:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546)], %8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)], %8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8944:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8987:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546)]) -> (%8988:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=548, solved=0))] (%8988:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8989:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547)]) + graph.CallGraphOp @model.layers.13.mlp (%8989:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547)]) -> (%8995:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=555)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=555, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8988:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8995:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=555)]) -> (%8996:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%8996:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)], %8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)]) -> () } } graph.SubGraphOp @model.layers.13.self_attn [using_qnn:true, symbol:model.layers.13.self_attn] { - (%8609:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8041:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8042:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%8641:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)]) { - linalg.CPU.LinearOp (%8609:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508)]) -> (%8610:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=514)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=510))] (%8609:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508)]) -> (%8611:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=512))] (%8609:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=508)]) -> (%8612:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=514), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=514), )] (%8610:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=514)]) -> (%8610:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=514)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=514), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=514), )] (%8610:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=514)]) -> (%8613:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=514)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511), )] (%8611:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511)]) -> (%8611:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511), )] (%8611:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511)]) -> (%8614:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513), )] (%8612:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513)]) -> (%8612:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513), )] (%8612:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513)]) -> (%8615:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=514), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=516))] (%8613:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=514)]) -> (%8616:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=518))] (%8614:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=511)]) -> (%8617:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515), )] (%8616:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8618:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517), )] (%8617:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8619:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517), outputs_0:QuantSpec(Raw(type: Float16), uuid=519), )] (%8619:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517)]) -> (%8620:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=519)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=519), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520), )] (%8620:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=519)]) -> (%8621:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520), )] (%8621:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)]) -> (%8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513), outputs_0:QuantSpec(Raw(type: Float16), uuid=521), )] (%8615:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=513)]) -> (%8623:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=521)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=521), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522), )] (%8623:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=521)]) -> (%8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), )] (%8041:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)]) -> (%8625:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), )] (%8042:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)]) -> (%8626:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), )] (%8625:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) -> (%8627:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), )] (%8626:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%8628:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523), )] (%8618:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515)], %8627:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) -> (%8629:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523), inputs_1:QuantSpec(Raw(type: Float32), uuid=524), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523), )] (%8629:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523)], %8630:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=524), constant:[0.088388346]]) -> (%8631:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525), )] (%8631:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523)]) -> (%8632:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525), inputs_1:QuantSpec(Raw(type: Int16), uuid=526), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525), )] (%8632:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525)], %8633:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=526), constant:[-20]]) -> (%8634:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=527), outputs_0:QuantSpec(Raw(type: UInt8), uuid=528), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8635:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=527), constant:[-0.78515625]]) -> (%8636:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=528)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=528), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525), )] (%8636:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=528)], %8631:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=523)], %8634:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525)]) -> (%8637:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529), )] (%8637:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=525)]) -> (%8638:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530), )] (%8638:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529)], %8628:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%8639:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530), )] (%8639:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530)]) -> (%8640:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530), )] (%8640:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530)]) -> (%8640:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=531))] (%8640:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=530)]) -> (%8641:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)]) - cf.ReturnOp (%8641:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=532)], %8622:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=520)], %8624:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=522)]) -> () + (%8945:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8234:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8235:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)]) -> (%8987:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546)], %8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)], %8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=523, solved=0))] (%8945:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521)]) -> (%8946:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=525, solved=0))] (%8945:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521)]) -> (%8947:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=527, solved=0))] (%8945:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521)]) -> (%8948:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=0), )] (%8946:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524)]) -> (%8946:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=0), )] (%8946:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524)]) -> (%8949:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526, solved=0), )] (%8947:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526)]) -> (%8947:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526, solved=0), )] (%8947:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526)]) -> (%8950:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=0), )] (%8948:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528)]) -> (%8948:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=0), )] (%8948:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528)]) -> (%8951:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=0))] (%8949:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524)]) -> (%8952:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=532, solved=0))] (%8950:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526)]) -> (%8953:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), )] (%8952:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) -> (%8952:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), )] (%8952:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) -> (%8952:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), )] (%8952:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) -> (%8954:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), )] (%8954:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)], %8952:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) -> (%8955:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), )] (%8955:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8956:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), )] (%8952:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8957:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), )] (%8957:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)], %8956:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) -> (%8958:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), )] (%8953:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) -> (%8953:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), )] (%8953:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) -> (%8953:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), )] (%8953:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) -> (%8959:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), )] (%8959:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)], %8953:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) -> (%8960:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), )] (%8960:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8961:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), )] (%8953:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8962:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), )] (%8962:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)], %8961:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) -> (%8963:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=533, solved=0), )] (%8963:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) -> (%8964:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=533)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=533, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534, solved=0), )] (%8964:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=533)]) -> (%8965:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534, solved=0), )] (%8965:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)]) -> (%8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=535, solved=0), )] (%8951:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528)]) -> (%8968:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=535)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=535, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536, solved=0), )] (%8968:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=535)]) -> (%8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16, solved=0), )] (%8234:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)]) -> (%8971:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=0), )] (%8235:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)], %8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)]) -> (%8972:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16, solved=0), )] (%8971:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) -> (%8973:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=0), )] (%8972:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)]) -> (%8974:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=0), )] (%8958:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)], %8973:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) -> (%8975:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=538, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=0), )] (%8975:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537)], %8976:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=538), constant:[0.088388346]]) -> (%8977:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=0), )] (%8977:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537)]) -> (%8978:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=540, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=0), )] (%8978:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539)], %8979:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=540), constant:[-20]]) -> (%8980:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=541, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=542, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8981:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=541), constant:[0]]) -> (%8982:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=542)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=542, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=0), )] (%8982:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=542)], %8977:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537)], %8980:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539)]) -> (%8983:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=543, solved=0), )] (%8983:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539)]) -> (%8984:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=543)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=543, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=0), )] (%8984:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=543)], %8974:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)]) -> (%8985:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=0), )] (%8985:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544)]) -> (%8986:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=0), )] (%8986:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544)]) -> (%8986:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=545, solved=0))] (%8986:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544)]) -> (%8987:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546)]) + cf.ReturnOp (%8987:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546)], %8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)], %8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)]) -> () } } graph.SubGraphOp @model.layers.13.mlp [using_qnn:true, symbol:model.layers.13.mlp] { - (%8643:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533)]) -> (%8648:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=535))] (%8643:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533)]) -> (%8644:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537), )] (%8644:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536)]) -> (%8645:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=539), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=538))] (%8643:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533)]) -> (%8646:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=539)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=539), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537), )] (%8645:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537)], %8646:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=539)]) -> (%8647:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=540))] (%8647:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537)]) -> (%8648:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)]) - cf.ReturnOp (%8648:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)]) -> () + (%8989:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547)]) -> (%8995:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=555)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=550, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=549, solved=0))] (%8989:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547)]) -> (%8990:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=550)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=551, solved=0))] (%8989:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547)]) -> (%8991:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=553, solved=0), )] (%8991:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552)]) -> (%8992:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=553)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=553, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=0), )] (%8991:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552)], %8992:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=553)]) -> (%8993:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=550, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=0), )] (%8993:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552)], %8990:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=550)]) -> (%8994:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=555, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=554, solved=0))] (%8994:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552)]) -> (%8995:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=555)]) + cf.ReturnOp (%8995:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=555)]) -> () } } graph.SubGraphOp @model.layers.14 [using_qnn:true, symbol:model.layers.14] { - (%8649:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8043:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8044:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%8690:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=543))] (%8649:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)]) -> (%8650:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542)]) - graph.CallGraphOp @model.layers.14.self_attn (%8650:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8043:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8044:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%8682:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566), )] (%8682:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)], %8649:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=541)]) -> (%8683:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=568))] (%8683:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)]) -> (%8684:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567)]) - graph.CallGraphOp @model.layers.14.mlp (%8684:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567)]) -> (%8689:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575), )] (%8689:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)], %8683:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)]) -> (%8690:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) - cf.ReturnOp (%8690:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) -> () + (%8996:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8236:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8237:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)]) -> (%9048:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)], %9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=556, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=557, solved=0))] (%8996:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8997:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=556)]) + graph.CallGraphOp @model.layers.14.self_attn (%8997:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=556)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8236:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8237:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)]) -> (%9039:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=581)], %9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)], %9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=581, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8996:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9039:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=581)]) -> (%9040:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=583, solved=0))] (%9040:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9041:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582)]) + graph.CallGraphOp @model.layers.14.mlp (%9041:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582)]) -> (%9047:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9040:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9047:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590)]) -> (%9048:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%9048:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)], %9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)]) -> () } } graph.SubGraphOp @model.layers.14.self_attn [using_qnn:true, symbol:model.layers.14.self_attn] { - (%8650:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8043:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8044:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%8682:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) { - linalg.CPU.LinearOp (%8650:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542)]) -> (%8651:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=548)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=544))] (%8650:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542)]) -> (%8652:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=546))] (%8650:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542)]) -> (%8653:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=548), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=548), )] (%8651:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=548)]) -> (%8651:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=548)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=548), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=548), )] (%8651:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=548)]) -> (%8654:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=548)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545), )] (%8652:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545)]) -> (%8652:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545), )] (%8652:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545)]) -> (%8655:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547), )] (%8653:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547)]) -> (%8653:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547), )] (%8653:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547)]) -> (%8656:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=548), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=550))] (%8654:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=548)]) -> (%8657:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=552))] (%8655:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545)]) -> (%8658:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549), )] (%8657:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8659:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551), )] (%8658:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8660:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551), outputs_0:QuantSpec(Raw(type: Float16), uuid=553), )] (%8660:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551)]) -> (%8661:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=553)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=553), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554), )] (%8661:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=553)]) -> (%8662:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554), )] (%8662:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)]) -> (%8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547), outputs_0:QuantSpec(Raw(type: Float16), uuid=555), )] (%8656:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547)]) -> (%8664:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=555)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=555), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556), )] (%8664:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=555)]) -> (%8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), )] (%8043:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)]) -> (%8666:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), )] (%8044:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) -> (%8667:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), )] (%8666:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) -> (%8668:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), )] (%8667:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%8669:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557), )] (%8659:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549)], %8668:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) -> (%8670:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557), inputs_1:QuantSpec(Raw(type: Float32), uuid=558), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557), )] (%8670:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557)], %8671:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=558), constant:[0.088388346]]) -> (%8672:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559), )] (%8672:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557)]) -> (%8673:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559), inputs_1:QuantSpec(Raw(type: Int16), uuid=560), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559), )] (%8673:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559)], %8674:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=560), constant:[-20]]) -> (%8675:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=561), outputs_0:QuantSpec(Raw(type: UInt8), uuid=562), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8676:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=561), constant:[-0.46289062]]) -> (%8677:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=562)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=562), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559), )] (%8677:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=562)], %8672:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557)], %8675:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559)]) -> (%8678:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563), )] (%8678:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559)]) -> (%8679:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564), )] (%8679:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563)], %8669:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%8680:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564), )] (%8680:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564)]) -> (%8681:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564), )] (%8681:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564)]) -> (%8681:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=565))] (%8681:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564)]) -> (%8682:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)]) - cf.ReturnOp (%8682:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)], %8663:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %8665:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) -> () + (%8997:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=556)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8236:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8237:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)]) -> (%9039:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=581)], %9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)], %9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=556, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=558, solved=0))] (%8997:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=556)]) -> (%8998:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=556, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=560, solved=0))] (%8997:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=556)]) -> (%8999:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=556, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=562, solved=0))] (%8997:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=556)]) -> (%9000:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559, solved=0), )] (%8998:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559)]) -> (%8998:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559, solved=0), )] (%8998:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559)]) -> (%9001:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561, solved=0), )] (%8999:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561)]) -> (%8999:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561, solved=0), )] (%8999:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561)]) -> (%9002:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=0), )] (%9000:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563)]) -> (%9000:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=0), )] (%9000:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563)]) -> (%9003:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=565, solved=0))] (%9001:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559)]) -> (%9004:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=567, solved=0))] (%9002:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561)]) -> (%9005:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), )] (%9004:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) -> (%9004:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), )] (%9004:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) -> (%9004:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), )] (%9004:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) -> (%9006:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), )] (%9006:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)], %9004:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) -> (%9007:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), )] (%9007:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9008:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), )] (%9004:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9009:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), )] (%9009:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)], %9008:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) -> (%9010:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), )] (%9005:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) -> (%9005:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), )] (%9005:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) -> (%9005:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), )] (%9005:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) -> (%9011:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), )] (%9011:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)], %9005:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) -> (%9012:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), )] (%9012:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9013:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), )] (%9005:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9014:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), )] (%9014:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)], %9013:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) -> (%9015:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=568, solved=0), )] (%9015:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) -> (%9016:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=568)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=568, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569, solved=0), )] (%9016:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=568)]) -> (%9017:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569, solved=0), )] (%9017:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)]) -> (%9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=570, solved=0), )] (%9003:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563)]) -> (%9020:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=570)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=570, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571, solved=0), )] (%9020:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=570)]) -> (%9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17, solved=0), )] (%8236:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)]) -> (%9023:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=0), )] (%8237:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)], %9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)]) -> (%9024:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17, solved=0), )] (%9023:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) -> (%9025:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=0), )] (%9024:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)]) -> (%9026:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=0), )] (%9010:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)], %9025:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) -> (%9027:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=573, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=0), )] (%9027:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572)], %9028:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=573), constant:[0.088388346]]) -> (%9029:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574, solved=0), )] (%9029:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572)]) -> (%9030:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=575, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574, solved=0), )] (%9030:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574)], %9031:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=575), constant:[-20]]) -> (%9032:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=576, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=577, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9033:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=576), constant:[0]]) -> (%9034:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=577)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=577, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574, solved=0), )] (%9034:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=577)], %9029:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572)], %9032:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574)]) -> (%9035:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=578, solved=0), )] (%9035:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574)]) -> (%9036:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=578)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=578, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=0), )] (%9036:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=578)], %9026:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)]) -> (%9037:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=0), )] (%9037:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579)]) -> (%9038:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=0), )] (%9038:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579)]) -> (%9038:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=581, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=580, solved=0))] (%9038:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579)]) -> (%9039:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=581)]) + cf.ReturnOp (%9039:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=581)], %9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)], %9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)]) -> () } } graph.SubGraphOp @model.layers.14.mlp [using_qnn:true, symbol:model.layers.14.mlp] { - (%8684:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567)]) -> (%8689:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=569))] (%8684:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567)]) -> (%8685:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=571), )] (%8685:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570)]) -> (%8686:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=571)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=573), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=572))] (%8684:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567)]) -> (%8687:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=573)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=571), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=573), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=571), )] (%8686:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=571)], %8687:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=573)]) -> (%8688:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=571)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=571), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=574))] (%8688:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=571)]) -> (%8689:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) - cf.ReturnOp (%8689:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) -> () + (%9041:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582)]) -> (%9047:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=584, solved=0))] (%9041:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582)]) -> (%9042:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=586, solved=0))] (%9041:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582)]) -> (%9043:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=588, solved=0), )] (%9043:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587)]) -> (%9044:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=588)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=588, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587, solved=0), )] (%9043:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587)], %9044:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=588)]) -> (%9045:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587, solved=0), )] (%9045:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587)], %9042:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585)]) -> (%9046:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=589, solved=0))] (%9046:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587)]) -> (%9047:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590)]) + cf.ReturnOp (%9047:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590)]) -> () } } graph.SubGraphOp @model.layers.15 [using_qnn:true, symbol:model.layers.15] { - (%8690:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8045:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8046:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%8731:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=577))] (%8690:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) -> (%8691:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576)]) - graph.CallGraphOp @model.layers.15.self_attn (%8691:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8045:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8046:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%8723:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600), )] (%8723:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)], %8690:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) -> (%8724:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=601), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=602))] (%8724:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)]) -> (%8725:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=601)]) - graph.CallGraphOp @model.layers.15.mlp (%8725:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=601)]) -> (%8730:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), )] (%8730:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)], %8724:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)]) -> (%8731:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) - cf.ReturnOp (%8731:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)]) -> () + (%9048:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8238:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8239:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)]) -> (%9100:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)], %9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=592, solved=0))] (%9048:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9049:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591)]) + graph.CallGraphOp @model.layers.15.self_attn (%9049:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8238:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8239:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)]) -> (%9091:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=616)], %9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)], %9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=616, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9048:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9091:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=616)]) -> (%9092:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=0))] (%9092:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9093:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617)]) + graph.CallGraphOp @model.layers.15.mlp (%9093:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617)]) -> (%9099:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=625)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=625, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9092:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9099:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=625)]) -> (%9100:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%9100:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)], %9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)]) -> () } } graph.SubGraphOp @model.layers.15.self_attn [using_qnn:true, symbol:model.layers.15.self_attn] { - (%8691:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8045:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8046:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%8723:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)]) { - linalg.CPU.LinearOp (%8691:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576)]) -> (%8692:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=582)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=578))] (%8691:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576)]) -> (%8693:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=580))] (%8691:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=576)]) -> (%8694:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=582), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=582), )] (%8692:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=582)]) -> (%8692:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=582)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=582), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=582), )] (%8692:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=582)]) -> (%8695:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=582)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579), )] (%8693:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579)]) -> (%8693:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579), )] (%8693:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579)]) -> (%8696:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581), )] (%8694:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581)]) -> (%8694:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581), )] (%8694:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581)]) -> (%8697:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=582), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=583), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=584))] (%8695:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=582)]) -> (%8698:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=583)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=585), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=586))] (%8696:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579)]) -> (%8699:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=585)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=583), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=583), )] (%8698:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=583)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8700:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=583)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=585), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=585), )] (%8699:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=585)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8701:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=585)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=585), outputs_0:QuantSpec(Raw(type: Float16), uuid=587), )] (%8701:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=585)]) -> (%8702:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=587)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=587), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588), )] (%8702:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=587)]) -> (%8703:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588), )] (%8703:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)]) -> (%8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581), outputs_0:QuantSpec(Raw(type: Float16), uuid=589), )] (%8697:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581)]) -> (%8705:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=589)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=589), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590), )] (%8705:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=589)]) -> (%8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), )] (%8045:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)]) -> (%8707:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), )] (%8046:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)]) -> (%8708:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), )] (%8707:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) -> (%8709:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), )] (%8708:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%8710:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=583), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591), )] (%8700:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=583)], %8709:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) -> (%8711:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591), inputs_1:QuantSpec(Raw(type: Float32), uuid=592), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591), )] (%8711:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591)], %8712:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=592), constant:[0.088388346]]) -> (%8713:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593), )] (%8713:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591)]) -> (%8714:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593), inputs_1:QuantSpec(Raw(type: Int16), uuid=594), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593), )] (%8714:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593)], %8715:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=594), constant:[-20]]) -> (%8716:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=595), outputs_0:QuantSpec(Raw(type: UInt8), uuid=596), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8717:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=595), constant:[0.953125]]) -> (%8718:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=596)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=596), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593), )] (%8718:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=596)], %8713:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=591)], %8716:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593)]) -> (%8719:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597), )] (%8719:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593)]) -> (%8720:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598), )] (%8720:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597)], %8710:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%8721:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598), )] (%8721:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598)]) -> (%8722:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598), )] (%8722:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598)]) -> (%8722:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=599))] (%8722:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=598)]) -> (%8723:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)]) - cf.ReturnOp (%8723:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)], %8704:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=588)], %8706:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=590)]) -> () + (%9049:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8238:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8239:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)]) -> (%9091:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=616)], %9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)], %9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=593, solved=0))] (%9049:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591)]) -> (%9050:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=595, solved=0))] (%9049:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591)]) -> (%9051:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=597, solved=0))] (%9049:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591)]) -> (%9052:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594, solved=0), )] (%9050:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594)]) -> (%9050:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594, solved=0), )] (%9050:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594)]) -> (%9053:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596, solved=0), )] (%9051:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596)]) -> (%9051:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596, solved=0), )] (%9051:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596)]) -> (%9054:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=0), )] (%9052:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598)]) -> (%9052:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=0), )] (%9052:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598)]) -> (%9055:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=600, solved=0))] (%9053:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594)]) -> (%9056:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=602, solved=0))] (%9054:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596)]) -> (%9057:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), )] (%9056:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) -> (%9056:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), )] (%9056:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) -> (%9056:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), )] (%9056:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) -> (%9058:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), )] (%9058:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)], %9056:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) -> (%9059:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), )] (%9059:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9060:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), )] (%9056:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9061:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), )] (%9061:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)], %9060:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) -> (%9062:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), )] (%9057:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) -> (%9057:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), )] (%9057:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) -> (%9057:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), )] (%9057:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) -> (%9063:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), )] (%9063:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)], %9057:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) -> (%9064:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), )] (%9064:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9065:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), )] (%9057:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9066:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), )] (%9066:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)], %9065:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) -> (%9067:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=603, solved=0), )] (%9067:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) -> (%9068:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=603)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=603, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604, solved=0), )] (%9068:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=603)]) -> (%9069:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604, solved=0), )] (%9069:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)]) -> (%9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=605, solved=0), )] (%9055:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598)]) -> (%9072:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=605)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=605, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606, solved=0), )] (%9072:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=605)]) -> (%9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18, solved=0), )] (%8238:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)]) -> (%9075:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=0), )] (%8239:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)], %9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)]) -> (%9076:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18, solved=0), )] (%9075:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) -> (%9077:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=0), )] (%9076:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)]) -> (%9078:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607, solved=0), )] (%9062:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)], %9077:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) -> (%9079:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=608, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607, solved=0), )] (%9079:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607)], %9080:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=608), constant:[0.088388346]]) -> (%9081:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=0), )] (%9081:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607)]) -> (%9082:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=610, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=0), )] (%9082:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609)], %9083:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=610), constant:[-20]]) -> (%9084:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=611, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=612, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9085:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=611), constant:[0]]) -> (%9086:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=612)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=612, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=0), )] (%9086:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=612)], %9081:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607)], %9084:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609)]) -> (%9087:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613, solved=0), )] (%9087:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609)]) -> (%9088:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614, solved=0), )] (%9088:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613)], %9078:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)]) -> (%9089:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614, solved=0), )] (%9089:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614)]) -> (%9090:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614, solved=0), )] (%9090:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614)]) -> (%9090:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=616, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=615, solved=0))] (%9090:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614)]) -> (%9091:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=616)]) + cf.ReturnOp (%9091:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=616)], %9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)], %9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)]) -> () } } graph.SubGraphOp @model.layers.15.mlp [using_qnn:true, symbol:model.layers.15.mlp] { - (%8725:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=601)]) -> (%8730:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=601), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=603))] (%8725:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=601)]) -> (%8726:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605), )] (%8726:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)]) -> (%8727:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=601), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=606))] (%8725:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=601)]) -> (%8728:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605), )] (%8727:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605)], %8728:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607)]) -> (%8729:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=608))] (%8729:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605)]) -> (%8730:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) - cf.ReturnOp (%8730:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) -> () + (%9093:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617)]) -> (%9099:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=625)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=619, solved=0))] (%9093:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617)]) -> (%9094:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=621, solved=0))] (%9093:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617)]) -> (%9095:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623, solved=0), )] (%9095:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622)]) -> (%9096:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=0), )] (%9095:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622)], %9096:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623)]) -> (%9097:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=0), )] (%9097:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622)], %9094:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620)]) -> (%9098:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=625, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=624, solved=0))] (%9098:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622)]) -> (%9099:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=625)]) + cf.ReturnOp (%9099:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=625)]) -> () } } graph.SubGraphOp @model.layers.16 [using_qnn:true, symbol:model.layers.16] { - (%8731:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8047:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8048:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%8772:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=611))] (%8731:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) -> (%8732:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610)]) - graph.CallGraphOp @model.layers.16.self_attn (%8732:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8047:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8048:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%8764:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634), )] (%8764:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)], %8731:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) -> (%8765:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=636))] (%8765:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) -> (%8766:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635)]) - graph.CallGraphOp @model.layers.16.mlp (%8766:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635)]) -> (%8771:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643), )] (%8771:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)], %8765:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) -> (%8772:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)]) - cf.ReturnOp (%8772:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)]) -> () + (%9100:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8240:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8241:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)]) -> (%9152:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)], %9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=626, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=627, solved=0))] (%9100:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9101:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=626)]) + graph.CallGraphOp @model.layers.16.self_attn (%9101:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=626)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8240:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8241:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)]) -> (%9143:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651)], %9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)], %9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9100:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9143:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651)]) -> (%9144:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=652, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=653, solved=0))] (%9144:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9145:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=652)]) + graph.CallGraphOp @model.layers.16.mlp (%9145:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=652)]) -> (%9151:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9144:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9151:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660)]) -> (%9152:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%9152:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)], %9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)]) -> () } } graph.SubGraphOp @model.layers.16.self_attn [using_qnn:true, symbol:model.layers.16.self_attn] { - (%8732:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8047:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8048:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%8764:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)]) { - linalg.CPU.LinearOp (%8732:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610)]) -> (%8733:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=616)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=612))] (%8732:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610)]) -> (%8734:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=614))] (%8732:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=610)]) -> (%8735:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=616), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=616), )] (%8733:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=616)]) -> (%8733:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=616)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=616), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=616), )] (%8733:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=616)]) -> (%8736:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=616)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613), )] (%8734:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613)]) -> (%8734:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613), )] (%8734:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613)]) -> (%8737:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615), )] (%8735:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615)]) -> (%8735:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615), )] (%8735:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615)]) -> (%8738:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=616), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=618))] (%8736:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=616)]) -> (%8739:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=620))] (%8737:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=613)]) -> (%8740:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617), )] (%8739:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8741:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619), )] (%8740:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8742:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619), outputs_0:QuantSpec(Raw(type: Float16), uuid=621), )] (%8742:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619)]) -> (%8743:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=621)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=621), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622), )] (%8743:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=621)]) -> (%8744:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622), )] (%8744:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)]) -> (%8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615), outputs_0:QuantSpec(Raw(type: Float16), uuid=623), )] (%8738:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=615)]) -> (%8746:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=623)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=623), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624), )] (%8746:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=623)]) -> (%8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), )] (%8047:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)]) -> (%8748:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), )] (%8048:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)]) -> (%8749:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), )] (%8748:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) -> (%8750:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), )] (%8749:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%8751:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625), )] (%8741:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617)], %8750:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) -> (%8752:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625), inputs_1:QuantSpec(Raw(type: Float32), uuid=626), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625), )] (%8752:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625)], %8753:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=626), constant:[0.088388346]]) -> (%8754:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627), )] (%8754:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625)]) -> (%8755:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627), inputs_1:QuantSpec(Raw(type: Int16), uuid=628), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627), )] (%8755:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627)], %8756:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=628), constant:[-20]]) -> (%8757:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=629), outputs_0:QuantSpec(Raw(type: UInt8), uuid=630), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8758:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=629), constant:[0.118652344]]) -> (%8759:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=630)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=630), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627), )] (%8759:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=630)], %8754:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=625)], %8757:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627)]) -> (%8760:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631), )] (%8760:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627)]) -> (%8761:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632), )] (%8761:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=631)], %8751:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%8762:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632), )] (%8762:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632)]) -> (%8763:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632), )] (%8763:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632)]) -> (%8763:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=633))] (%8763:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632)]) -> (%8764:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) - cf.ReturnOp (%8764:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)], %8745:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=622)], %8747:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=624)]) -> () + (%9101:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=626)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8240:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8241:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)]) -> (%9143:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651)], %9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)], %9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=626, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=628, solved=0))] (%9101:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=626)]) -> (%9102:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=626, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=630, solved=0))] (%9101:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=626)]) -> (%9103:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=626, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=632, solved=0))] (%9101:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=626)]) -> (%9104:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629, solved=0), )] (%9102:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629)]) -> (%9102:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629, solved=0), )] (%9102:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629)]) -> (%9105:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631, solved=0), )] (%9103:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631)]) -> (%9103:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631, solved=0), )] (%9103:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631)]) -> (%9106:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633, solved=0), )] (%9104:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633)]) -> (%9104:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633, solved=0), )] (%9104:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633)]) -> (%9107:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=635, solved=0))] (%9105:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629)]) -> (%9108:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=637, solved=0))] (%9106:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631)]) -> (%9109:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), )] (%9108:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) -> (%9108:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), )] (%9108:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) -> (%9108:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), )] (%9108:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) -> (%9110:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), )] (%9110:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)], %9108:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) -> (%9111:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), )] (%9111:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9112:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), )] (%9108:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9113:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), )] (%9113:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)], %9112:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) -> (%9114:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), )] (%9109:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) -> (%9109:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), )] (%9109:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) -> (%9109:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), )] (%9109:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) -> (%9115:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), )] (%9115:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)], %9109:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) -> (%9116:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), )] (%9116:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9117:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), )] (%9109:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9118:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), )] (%9118:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)], %9117:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) -> (%9119:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=638, solved=0), )] (%9119:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) -> (%9120:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=638)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=638, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639, solved=0), )] (%9120:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=638)]) -> (%9121:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639, solved=0), )] (%9121:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)]) -> (%9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=640, solved=0), )] (%9107:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633)]) -> (%9124:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=640)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=640, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641, solved=0), )] (%9124:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=640)]) -> (%9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19, solved=0), )] (%8240:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)]) -> (%9127:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=0), )] (%8241:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)], %9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)]) -> (%9128:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19, solved=0), )] (%9127:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) -> (%9129:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=0), )] (%9128:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)]) -> (%9130:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642, solved=0), )] (%9114:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)], %9129:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) -> (%9131:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=643, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642, solved=0), )] (%9131:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642)], %9132:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=643), constant:[0.088388346]]) -> (%9133:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=0), )] (%9133:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642)]) -> (%9134:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=645, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=0), )] (%9134:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644)], %9135:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=645), constant:[-20]]) -> (%9136:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=646, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=647, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9137:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=646), constant:[0]]) -> (%9138:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=647)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=647, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=0), )] (%9138:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=647)], %9133:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642)], %9136:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644)]) -> (%9139:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648, solved=0), )] (%9139:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644)]) -> (%9140:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649, solved=0), )] (%9140:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648)], %9130:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)]) -> (%9141:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649, solved=0), )] (%9141:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649)]) -> (%9142:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649, solved=0), )] (%9142:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649)]) -> (%9142:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=650, solved=0))] (%9142:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649)]) -> (%9143:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651)]) + cf.ReturnOp (%9143:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651)], %9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)], %9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)]) -> () } } graph.SubGraphOp @model.layers.16.mlp [using_qnn:true, symbol:model.layers.16.mlp] { - (%8766:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635)]) -> (%8771:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=638), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=637))] (%8766:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635)]) -> (%8767:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=638)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=638), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639), )] (%8767:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=638)]) -> (%8768:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=641), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=640))] (%8766:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635)]) -> (%8769:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=641)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=641), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639), )] (%8768:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639)], %8769:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=641)]) -> (%8770:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=642))] (%8770:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639)]) -> (%8771:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)]) - cf.ReturnOp (%8771:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)]) -> () + (%9145:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=652)]) -> (%9151:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=652, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=654, solved=0))] (%9145:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=652)]) -> (%9146:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=652, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=656, solved=0))] (%9145:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=652)]) -> (%9147:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=0), )] (%9147:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657)]) -> (%9148:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657, solved=0), )] (%9147:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657)], %9148:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658)]) -> (%9149:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657, solved=0), )] (%9149:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657)], %9146:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655)]) -> (%9150:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=659, solved=0))] (%9150:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657)]) -> (%9151:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660)]) + cf.ReturnOp (%9151:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660)]) -> () } } graph.SubGraphOp @model.layers.17 [using_qnn:true, symbol:model.layers.17] { - (%8772:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8049:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8050:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%8813:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=645))] (%8772:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)]) -> (%8773:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644)]) - graph.CallGraphOp @model.layers.17.self_attn (%8773:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8049:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8050:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%8805:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668), )] (%8805:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)], %8772:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=643)]) -> (%8806:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=670))] (%8806:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)]) -> (%8807:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669)]) - graph.CallGraphOp @model.layers.17.mlp (%8807:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669)]) -> (%8812:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677), )] (%8812:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)], %8806:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)]) -> (%8813:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) - cf.ReturnOp (%8813:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)]) -> () + (%9152:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8242:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8243:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)]) -> (%9204:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)], %9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=662, solved=0))] (%9152:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9153:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661)]) + graph.CallGraphOp @model.layers.17.self_attn (%9153:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8242:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8243:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)]) -> (%9195:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686)], %9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)], %9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9152:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9195:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686)]) -> (%9196:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=687, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=688, solved=0))] (%9196:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9197:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=687)]) + graph.CallGraphOp @model.layers.17.mlp (%9197:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=687)]) -> (%9203:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=695)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=695, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9196:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9203:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=695)]) -> (%9204:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%9204:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)], %9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)]) -> () } } graph.SubGraphOp @model.layers.17.self_attn [using_qnn:true, symbol:model.layers.17.self_attn] { - (%8773:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8049:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8050:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%8805:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)]) { - linalg.CPU.LinearOp (%8773:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644)]) -> (%8774:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=650)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=646))] (%8773:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644)]) -> (%8775:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=648))] (%8773:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=644)]) -> (%8776:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=650), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=650), )] (%8774:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=650)]) -> (%8774:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=650)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=650), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=650), )] (%8774:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=650)]) -> (%8777:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=650)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647), )] (%8775:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647)]) -> (%8775:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647), )] (%8775:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647)]) -> (%8778:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649), )] (%8776:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649)]) -> (%8776:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649), )] (%8776:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649)]) -> (%8779:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=650), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=651), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=652))] (%8777:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=650)]) -> (%8780:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=651)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=654))] (%8778:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647)]) -> (%8781:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=651), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=651), )] (%8780:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=651)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8782:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=651)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653), )] (%8781:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8783:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653), outputs_0:QuantSpec(Raw(type: Float16), uuid=655), )] (%8783:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653)]) -> (%8784:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=655)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=655), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656), )] (%8784:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=655)]) -> (%8785:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656), )] (%8785:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)]) -> (%8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649), outputs_0:QuantSpec(Raw(type: Float16), uuid=657), )] (%8779:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649)]) -> (%8787:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=657)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=657), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658), )] (%8787:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=657)]) -> (%8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), )] (%8049:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)]) -> (%8789:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), )] (%8050:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)]) -> (%8790:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), )] (%8789:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) -> (%8791:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), )] (%8790:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%8792:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=651), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659), )] (%8782:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=651)], %8791:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) -> (%8793:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659), inputs_1:QuantSpec(Raw(type: Float32), uuid=660), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659), )] (%8793:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659)], %8794:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=660), constant:[0.088388346]]) -> (%8795:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661), )] (%8795:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659)]) -> (%8796:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661), inputs_1:QuantSpec(Raw(type: Int16), uuid=662), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661), )] (%8796:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661)], %8797:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=662), constant:[-20]]) -> (%8798:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=663), outputs_0:QuantSpec(Raw(type: UInt8), uuid=664), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8799:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=663), constant:[-0.99609375]]) -> (%8800:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=664)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=664), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661), )] (%8800:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=664)], %8795:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659)], %8798:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661)]) -> (%8801:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665), )] (%8801:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=661)]) -> (%8802:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666), )] (%8802:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665)], %8792:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%8803:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666), )] (%8803:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666)]) -> (%8804:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666), )] (%8804:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666)]) -> (%8804:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=667))] (%8804:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=666)]) -> (%8805:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)]) - cf.ReturnOp (%8805:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=668)], %8786:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=656)], %8788:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=658)]) -> () + (%9153:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8242:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8243:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)]) -> (%9195:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686)], %9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)], %9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=663, solved=0))] (%9153:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661)]) -> (%9154:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=665, solved=0))] (%9153:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661)]) -> (%9155:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=667, solved=0))] (%9153:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661)]) -> (%9156:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664, solved=0), )] (%9154:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664)]) -> (%9154:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664, solved=0), )] (%9154:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664)]) -> (%9157:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=0), )] (%9155:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666)]) -> (%9155:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=0), )] (%9155:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666)]) -> (%9158:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=0), )] (%9156:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668)]) -> (%9156:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=0), )] (%9156:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668)]) -> (%9159:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=670, solved=0))] (%9157:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664)]) -> (%9160:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=672, solved=0))] (%9158:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666)]) -> (%9161:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), )] (%9160:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) -> (%9160:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), )] (%9160:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) -> (%9160:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), )] (%9160:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) -> (%9162:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), )] (%9162:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)], %9160:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) -> (%9163:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), )] (%9163:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9164:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), )] (%9160:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9165:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), )] (%9165:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)], %9164:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) -> (%9166:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), )] (%9161:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) -> (%9161:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), )] (%9161:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) -> (%9161:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), )] (%9161:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) -> (%9167:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), )] (%9167:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)], %9161:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) -> (%9168:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), )] (%9168:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9169:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), )] (%9161:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9170:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), )] (%9170:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)], %9169:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) -> (%9171:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=673, solved=0), )] (%9171:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) -> (%9172:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=673)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=673, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674, solved=0), )] (%9172:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=673)]) -> (%9173:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674, solved=0), )] (%9173:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)]) -> (%9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=675, solved=0), )] (%9159:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668)]) -> (%9176:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=675)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=675, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676, solved=0), )] (%9176:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=675)]) -> (%9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20, solved=0), )] (%8242:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)]) -> (%9179:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=0), )] (%8243:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)], %9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)]) -> (%9180:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20, solved=0), )] (%9179:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) -> (%9181:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=0), )] (%9180:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)]) -> (%9182:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=0), )] (%9166:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)], %9181:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) -> (%9183:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=678, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=0), )] (%9183:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677)], %9184:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=678), constant:[0.088388346]]) -> (%9185:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679, solved=0), )] (%9185:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677)]) -> (%9186:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=680, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679, solved=0), )] (%9186:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679)], %9187:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=680), constant:[-20]]) -> (%9188:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=681, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=682, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9189:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=681), constant:[0]]) -> (%9190:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=682)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=682, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679, solved=0), )] (%9190:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=682)], %9185:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677)], %9188:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679)]) -> (%9191:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=683, solved=0), )] (%9191:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679)]) -> (%9192:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=683)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=683, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684, solved=0), )] (%9192:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=683)], %9182:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)]) -> (%9193:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684, solved=0), )] (%9193:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684)]) -> (%9194:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684, solved=0), )] (%9194:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684)]) -> (%9194:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=685, solved=0))] (%9194:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684)]) -> (%9195:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686)]) + cf.ReturnOp (%9195:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686)], %9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)], %9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)]) -> () } } graph.SubGraphOp @model.layers.17.mlp [using_qnn:true, symbol:model.layers.17.mlp] { - (%8807:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669)]) -> (%8812:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=671))] (%8807:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669)]) -> (%8808:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=673), )] (%8808:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672)]) -> (%8809:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=673)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=675), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=674))] (%8807:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669)]) -> (%8810:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=675)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=673), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=675), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=673), )] (%8809:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=673)], %8810:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=675)]) -> (%8811:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=673)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=673), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=676))] (%8811:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=673)]) -> (%8812:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) - cf.ReturnOp (%8812:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) -> () + (%9197:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=687)]) -> (%9203:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=695)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=687, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=690, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=689, solved=0))] (%9197:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=687)]) -> (%9198:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=690)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=687, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=691, solved=0))] (%9197:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=687)]) -> (%9199:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693, solved=0), )] (%9199:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692)]) -> (%9200:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692, solved=0), )] (%9199:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692)], %9200:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693)]) -> (%9201:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=690, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692, solved=0), )] (%9201:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692)], %9198:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=690)]) -> (%9202:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=695, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=694, solved=0))] (%9202:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692)]) -> (%9203:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=695)]) + cf.ReturnOp (%9203:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=695)]) -> () } } graph.SubGraphOp @model.layers.18 [using_qnn:true, symbol:model.layers.18] { - (%8813:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8051:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8052:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%8854:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=679))] (%8813:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) -> (%8814:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678)]) - graph.CallGraphOp @model.layers.18.self_attn (%8814:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8051:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8052:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%8846:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702), )] (%8846:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)], %8813:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) -> (%8847:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=703), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=704))] (%8847:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)]) -> (%8848:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=703)]) - graph.CallGraphOp @model.layers.18.mlp (%8848:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=703)]) -> (%8853:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711), )] (%8853:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)], %8847:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)]) -> (%8854:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)]) - cf.ReturnOp (%8854:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)]) -> () + (%9204:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8244:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8245:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)]) -> (%9256:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)], %9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=697, solved=0))] (%9204:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9205:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696)]) + graph.CallGraphOp @model.layers.18.self_attn (%9205:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8244:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8245:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)]) -> (%9247:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=721)], %9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)], %9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=721, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9204:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9247:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=721)]) -> (%9248:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=722, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=723, solved=0))] (%9248:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9249:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=722)]) + graph.CallGraphOp @model.layers.18.mlp (%9249:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=722)]) -> (%9255:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=730)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=730, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9248:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9255:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=730)]) -> (%9256:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%9256:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)], %9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)]) -> () } } graph.SubGraphOp @model.layers.18.self_attn [using_qnn:true, symbol:model.layers.18.self_attn] { - (%8814:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8051:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8052:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%8846:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)]) { - linalg.CPU.LinearOp (%8814:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678)]) -> (%8815:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=684)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=680))] (%8814:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678)]) -> (%8816:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=682))] (%8814:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=678)]) -> (%8817:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=684), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=684), )] (%8815:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=684)]) -> (%8815:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=684)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=684), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=684), )] (%8815:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=684)]) -> (%8818:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=684)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681), )] (%8816:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681)]) -> (%8816:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681), )] (%8816:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681)]) -> (%8819:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683), )] (%8817:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683)]) -> (%8817:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683), )] (%8817:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683)]) -> (%8820:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=684), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=685), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=686))] (%8818:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=684)]) -> (%8821:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=685)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=688))] (%8819:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=681)]) -> (%8822:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=685), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=685), )] (%8821:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=685)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8823:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=685)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687), )] (%8822:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8824:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687), outputs_0:QuantSpec(Raw(type: Float16), uuid=689), )] (%8824:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687)]) -> (%8825:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=689)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=689), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690), )] (%8825:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=689)]) -> (%8826:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690), )] (%8826:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)]) -> (%8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683), outputs_0:QuantSpec(Raw(type: Float16), uuid=691), )] (%8820:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683)]) -> (%8828:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=691)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=691), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692), )] (%8828:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=691)]) -> (%8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), )] (%8051:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)]) -> (%8830:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), )] (%8052:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)]) -> (%8831:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), )] (%8830:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) -> (%8832:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), )] (%8831:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%8833:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=685), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693), )] (%8823:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=685)], %8832:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) -> (%8834:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693), inputs_1:QuantSpec(Raw(type: Float32), uuid=694), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693), )] (%8834:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693)], %8835:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=694), constant:[0.088388346]]) -> (%8836:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695), )] (%8836:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693)]) -> (%8837:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695), inputs_1:QuantSpec(Raw(type: Int16), uuid=696), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695), )] (%8837:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695)], %8838:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=696), constant:[-20]]) -> (%8839:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=697), outputs_0:QuantSpec(Raw(type: UInt8), uuid=698), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8840:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=697), constant:[0.24023438]]) -> (%8841:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=698)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=698), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695), )] (%8841:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=698)], %8836:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=693)], %8839:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695)]) -> (%8842:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699), )] (%8842:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695)]) -> (%8843:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700), )] (%8843:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699)], %8833:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%8844:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700), )] (%8844:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700)]) -> (%8845:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700), )] (%8845:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700)]) -> (%8845:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=701))] (%8845:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=700)]) -> (%8846:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)]) - cf.ReturnOp (%8846:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)], %8827:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=690)], %8829:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=692)]) -> () + (%9205:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8244:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8245:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)]) -> (%9247:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=721)], %9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)], %9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=698, solved=0))] (%9205:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696)]) -> (%9206:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=700, solved=0))] (%9205:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696)]) -> (%9207:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=702, solved=0))] (%9205:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696)]) -> (%9208:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=0), )] (%9206:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699)]) -> (%9206:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=0), )] (%9206:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699)]) -> (%9209:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701, solved=0), )] (%9207:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701)]) -> (%9207:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701, solved=0), )] (%9207:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701)]) -> (%9210:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703, solved=0), )] (%9208:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703)]) -> (%9208:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703, solved=0), )] (%9208:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703)]) -> (%9211:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=705, solved=0))] (%9209:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699)]) -> (%9212:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=707, solved=0))] (%9210:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701)]) -> (%9213:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), )] (%9212:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) -> (%9212:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), )] (%9212:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) -> (%9212:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), )] (%9212:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) -> (%9214:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), )] (%9214:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)], %9212:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) -> (%9215:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), )] (%9215:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9216:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), )] (%9212:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9217:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), )] (%9217:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)], %9216:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) -> (%9218:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), )] (%9213:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) -> (%9213:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), )] (%9213:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) -> (%9213:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), )] (%9213:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) -> (%9219:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), )] (%9219:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)], %9213:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) -> (%9220:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), )] (%9220:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9221:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), )] (%9213:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9222:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), )] (%9222:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)], %9221:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) -> (%9223:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=708, solved=0), )] (%9223:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) -> (%9224:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=708)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=708, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709, solved=0), )] (%9224:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=708)]) -> (%9225:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709, solved=0), )] (%9225:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)]) -> (%9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=710, solved=0), )] (%9211:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703)]) -> (%9228:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=710)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=710, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711, solved=0), )] (%9228:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=710)]) -> (%9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21, solved=0), )] (%8244:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)]) -> (%9231:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=0), )] (%8245:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)], %9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)]) -> (%9232:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21, solved=0), )] (%9231:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) -> (%9233:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=0), )] (%9232:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)]) -> (%9234:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=0), )] (%9218:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)], %9233:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) -> (%9235:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=713, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=0), )] (%9235:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712)], %9236:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=713), constant:[0.088388346]]) -> (%9237:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=0), )] (%9237:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712)]) -> (%9238:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=715, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=0), )] (%9238:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714)], %9239:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=715), constant:[-20]]) -> (%9240:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=716, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=717, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9241:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=716), constant:[0]]) -> (%9242:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=717)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=717, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=0), )] (%9242:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=717)], %9237:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712)], %9240:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714)]) -> (%9243:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=718, solved=0), )] (%9243:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714)]) -> (%9244:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=718)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=718, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719, solved=0), )] (%9244:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=718)], %9234:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)]) -> (%9245:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719, solved=0), )] (%9245:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719)]) -> (%9246:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719, solved=0), )] (%9246:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719)]) -> (%9246:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=721, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=720, solved=0))] (%9246:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719)]) -> (%9247:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=721)]) + cf.ReturnOp (%9247:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=721)], %9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)], %9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)]) -> () } } graph.SubGraphOp @model.layers.18.mlp [using_qnn:true, symbol:model.layers.18.mlp] { - (%8848:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=703)]) -> (%8853:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=703), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=706), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=705))] (%8848:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=703)]) -> (%8849:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=706)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=706), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707), )] (%8849:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=706)]) -> (%8850:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=703), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=708))] (%8848:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=703)]) -> (%8851:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707), )] (%8850:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707)], %8851:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709)]) -> (%8852:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=710))] (%8852:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707)]) -> (%8853:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)]) - cf.ReturnOp (%8853:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)]) -> () + (%9249:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=722)]) -> (%9255:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=730)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=722, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=725, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=724, solved=0))] (%9249:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=722)]) -> (%9250:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=725)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=722, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=726, solved=0))] (%9249:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=722)]) -> (%9251:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=728, solved=0), )] (%9251:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727)]) -> (%9252:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=728)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=728, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=0), )] (%9251:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727)], %9252:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=728)]) -> (%9253:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=725, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=0), )] (%9253:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727)], %9250:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=725)]) -> (%9254:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=730, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=729, solved=0))] (%9254:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727)]) -> (%9255:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=730)]) + cf.ReturnOp (%9255:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=730)]) -> () } } graph.SubGraphOp @model.layers.19 [using_qnn:true, symbol:model.layers.19] { - (%8854:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8053:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8054:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%8895:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=713))] (%8854:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)]) -> (%8855:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712)]) - graph.CallGraphOp @model.layers.19.self_attn (%8855:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8053:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8054:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%8887:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736), )] (%8887:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)], %8854:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=711)]) -> (%8888:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=738))] (%8888:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)]) -> (%8889:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737)]) - graph.CallGraphOp @model.layers.19.mlp (%8889:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737)]) -> (%8894:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745), )] (%8894:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)], %8888:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)]) -> (%8895:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)]) - cf.ReturnOp (%8895:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)]) -> () + (%9256:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8246:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8247:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)]) -> (%9308:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)], %9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=0))] (%9256:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9257:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731)]) + graph.CallGraphOp @model.layers.19.self_attn (%9257:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8246:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8247:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)]) -> (%9299:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756)], %9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)], %9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9256:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9299:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756)]) -> (%9300:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=757, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=0))] (%9300:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9301:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=757)]) + graph.CallGraphOp @model.layers.19.mlp (%9301:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=757)]) -> (%9307:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9300:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9307:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765)]) -> (%9308:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%9308:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)], %9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)]) -> () } } graph.SubGraphOp @model.layers.19.self_attn [using_qnn:true, symbol:model.layers.19.self_attn] { - (%8855:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8053:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8054:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%8887:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)]) { - linalg.CPU.LinearOp (%8855:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712)]) -> (%8856:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=718)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=714))] (%8855:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712)]) -> (%8857:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=716))] (%8855:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=712)]) -> (%8858:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=718), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=718), )] (%8856:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=718)]) -> (%8856:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=718)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=718), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=718), )] (%8856:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=718)]) -> (%8859:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=718)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715), )] (%8857:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715)]) -> (%8857:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715), )] (%8857:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715)]) -> (%8860:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717), )] (%8858:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717)]) -> (%8858:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717), )] (%8858:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717)]) -> (%8861:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=718), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=720))] (%8859:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=718)]) -> (%8862:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=721), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=722))] (%8860:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=715)]) -> (%8863:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=721)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719), )] (%8862:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8864:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=721), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=721), )] (%8863:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=721)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8865:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=721)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=721), outputs_0:QuantSpec(Raw(type: Float16), uuid=723), )] (%8865:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=721)]) -> (%8866:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=723)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=723), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724), )] (%8866:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=723)]) -> (%8867:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724), )] (%8867:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)]) -> (%8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717), outputs_0:QuantSpec(Raw(type: Float16), uuid=725), )] (%8861:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717)]) -> (%8869:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=725)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=725), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726), )] (%8869:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=725)]) -> (%8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), )] (%8053:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)]) -> (%8871:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), )] (%8054:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)]) -> (%8872:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), )] (%8871:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) -> (%8873:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), )] (%8872:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%8874:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727), )] (%8864:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719)], %8873:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) -> (%8875:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727), inputs_1:QuantSpec(Raw(type: Float32), uuid=728), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727), )] (%8875:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727)], %8876:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=728), constant:[0.088388346]]) -> (%8877:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729), )] (%8877:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727)]) -> (%8878:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729), inputs_1:QuantSpec(Raw(type: Int16), uuid=730), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729), )] (%8878:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729)], %8879:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=730), constant:[-20]]) -> (%8880:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=731), outputs_0:QuantSpec(Raw(type: UInt8), uuid=732), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8881:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=731), constant:[0.55078125]]) -> (%8882:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=732)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=732), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729), )] (%8882:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=732)], %8877:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727)], %8880:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729)]) -> (%8883:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733), )] (%8883:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729)]) -> (%8884:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734), )] (%8884:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=733)], %8874:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%8885:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734), )] (%8885:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734)]) -> (%8886:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734), )] (%8886:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734)]) -> (%8886:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=735))] (%8886:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=734)]) -> (%8887:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)]) - cf.ReturnOp (%8887:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=736)], %8868:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=724)], %8870:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=726)]) -> () + (%9257:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8246:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8247:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)]) -> (%9299:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756)], %9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)], %9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=733, solved=0))] (%9257:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731)]) -> (%9258:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=735, solved=0))] (%9257:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731)]) -> (%9259:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=737, solved=0))] (%9257:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731)]) -> (%9260:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=0), )] (%9258:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734)]) -> (%9258:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=0), )] (%9258:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734)]) -> (%9261:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=0), )] (%9259:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736)]) -> (%9259:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=0), )] (%9259:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736)]) -> (%9262:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738, solved=0), )] (%9260:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738)]) -> (%9260:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738, solved=0), )] (%9260:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738)]) -> (%9263:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=740, solved=0))] (%9261:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734)]) -> (%9264:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=742, solved=0))] (%9262:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736)]) -> (%9265:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), )] (%9264:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) -> (%9264:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), )] (%9264:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) -> (%9264:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), )] (%9264:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) -> (%9266:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), )] (%9266:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)], %9264:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) -> (%9267:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), )] (%9267:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9268:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), )] (%9264:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9269:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), )] (%9269:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)], %9268:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) -> (%9270:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), )] (%9265:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) -> (%9265:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), )] (%9265:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) -> (%9265:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), )] (%9265:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) -> (%9271:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), )] (%9271:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)], %9265:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) -> (%9272:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), )] (%9272:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9273:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), )] (%9265:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9274:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), )] (%9274:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)], %9273:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) -> (%9275:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=743, solved=0), )] (%9275:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) -> (%9276:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=743)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=743, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744, solved=0), )] (%9276:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=743)]) -> (%9277:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744, solved=0), )] (%9277:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)]) -> (%9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=745, solved=0), )] (%9263:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738)]) -> (%9280:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=745)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=745, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746, solved=0), )] (%9280:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=745)]) -> (%9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22, solved=0), )] (%8246:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)]) -> (%9283:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=0), )] (%8247:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)], %9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)]) -> (%9284:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22, solved=0), )] (%9283:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) -> (%9285:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=0), )] (%9284:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)]) -> (%9286:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747, solved=0), )] (%9270:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)], %9285:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) -> (%9287:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=748, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747, solved=0), )] (%9287:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747)], %9288:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=748), constant:[0.088388346]]) -> (%9289:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749, solved=0), )] (%9289:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747)]) -> (%9290:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=750, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749, solved=0), )] (%9290:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749)], %9291:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=750), constant:[-20]]) -> (%9292:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=751, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=752, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9293:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=751), constant:[0]]) -> (%9294:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=752)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=752, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749, solved=0), )] (%9294:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=752)], %9289:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747)], %9292:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749)]) -> (%9295:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=753, solved=0), )] (%9295:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749)]) -> (%9296:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=753)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=753, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754, solved=0), )] (%9296:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=753)], %9286:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)]) -> (%9297:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754, solved=0), )] (%9297:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754)]) -> (%9298:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754, solved=0), )] (%9298:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754)]) -> (%9298:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=755, solved=0))] (%9298:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754)]) -> (%9299:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756)]) + cf.ReturnOp (%9299:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756)], %9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)], %9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)]) -> () } } graph.SubGraphOp @model.layers.19.mlp [using_qnn:true, symbol:model.layers.19.mlp] { - (%8889:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737)]) -> (%8894:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=740), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=739))] (%8889:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737)]) -> (%8890:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=740)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=740), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=741), )] (%8890:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=740)]) -> (%8891:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=741)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=743), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=742))] (%8889:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737)]) -> (%8892:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=743)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=741), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=743), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=741), )] (%8891:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=741)], %8892:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=743)]) -> (%8893:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=741)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=741), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=744))] (%8893:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=741)]) -> (%8894:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)]) - cf.ReturnOp (%8894:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)]) -> () + (%9301:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=757)]) -> (%9307:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=757, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=760, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=759, solved=0))] (%9301:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=757)]) -> (%9302:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=760)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=757, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=761, solved=0))] (%9301:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=757)]) -> (%9303:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=763, solved=0), )] (%9303:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762)]) -> (%9304:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=763)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=763, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=0), )] (%9303:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762)], %9304:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=763)]) -> (%9305:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=760, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=0), )] (%9305:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762)], %9302:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=760)]) -> (%9306:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=764, solved=0))] (%9306:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762)]) -> (%9307:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765)]) + cf.ReturnOp (%9307:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765)]) -> () } } graph.SubGraphOp @model.layers.20 [using_qnn:true, symbol:model.layers.20] { - (%8895:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8055:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8056:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%8936:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=747))] (%8895:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)]) -> (%8896:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)]) - graph.CallGraphOp @model.layers.20.self_attn (%8896:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8055:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8056:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%8928:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770), )] (%8928:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)], %8895:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=745)]) -> (%8929:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=771), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=772))] (%8929:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)]) -> (%8930:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=771)]) - graph.CallGraphOp @model.layers.20.mlp (%8930:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=771)]) -> (%8935:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779), )] (%8935:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)], %8929:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)]) -> (%8936:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) - cf.ReturnOp (%8936:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)]) -> () + (%9308:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8248:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8249:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)]) -> (%9360:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)], %9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=766, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=767, solved=0))] (%9308:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9309:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=766)]) + graph.CallGraphOp @model.layers.20.self_attn (%9309:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=766)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8248:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8249:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)]) -> (%9351:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791)], %9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)], %9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9308:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9351:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791)]) -> (%9352:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=792, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=793, solved=0))] (%9352:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9353:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=792)]) + graph.CallGraphOp @model.layers.20.mlp (%9353:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=792)]) -> (%9359:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9352:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9359:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800)]) -> (%9360:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%9360:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)], %9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)]) -> () } } graph.SubGraphOp @model.layers.20.self_attn [using_qnn:true, symbol:model.layers.20.self_attn] { - (%8896:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8055:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8056:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%8928:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)]) { - linalg.CPU.LinearOp (%8896:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)]) -> (%8897:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=752)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=748))] (%8896:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)]) -> (%8898:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=750))] (%8896:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)]) -> (%8899:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=752), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=752), )] (%8897:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=752)]) -> (%8897:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=752)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=752), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=752), )] (%8897:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=752)]) -> (%8900:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=752)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749), )] (%8898:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749)]) -> (%8898:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749), )] (%8898:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749)]) -> (%8901:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751), )] (%8899:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751)]) -> (%8899:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751), )] (%8899:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751)]) -> (%8902:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=752), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=753), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=754))] (%8900:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=752)]) -> (%8903:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=753)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=756))] (%8901:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749)]) -> (%8904:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=753), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=753), )] (%8903:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=753)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8905:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=753)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755), )] (%8904:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8906:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755), outputs_0:QuantSpec(Raw(type: Float16), uuid=757), )] (%8906:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755)]) -> (%8907:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=757)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=757), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758), )] (%8907:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=757)]) -> (%8908:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758), )] (%8908:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)]) -> (%8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751), outputs_0:QuantSpec(Raw(type: Float16), uuid=759), )] (%8902:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=751)]) -> (%8910:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=759)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=759), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760), )] (%8910:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=759)]) -> (%8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), )] (%8055:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)]) -> (%8912:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), )] (%8056:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)]) -> (%8913:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), )] (%8912:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) -> (%8914:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), )] (%8913:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%8915:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=753), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761), )] (%8905:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=753)], %8914:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) -> (%8916:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761), inputs_1:QuantSpec(Raw(type: Float32), uuid=762), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761), )] (%8916:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761)], %8917:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=762), constant:[0.088388346]]) -> (%8918:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763), )] (%8918:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761)]) -> (%8919:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763), inputs_1:QuantSpec(Raw(type: Int16), uuid=764), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763), )] (%8919:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763)], %8920:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=764), constant:[-20]]) -> (%8921:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=765), outputs_0:QuantSpec(Raw(type: UInt8), uuid=766), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8922:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=765), constant:[0.71875]]) -> (%8923:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=766)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=766), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763), )] (%8923:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=766)], %8918:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761)], %8921:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763)]) -> (%8924:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767), )] (%8924:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=763)]) -> (%8925:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768), )] (%8925:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767)], %8915:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%8926:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768), )] (%8926:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768)]) -> (%8927:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768), )] (%8927:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768)]) -> (%8927:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=769))] (%8927:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=768)]) -> (%8928:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)]) - cf.ReturnOp (%8928:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=770)], %8909:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=758)], %8911:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=760)]) -> () + (%9309:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=766)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8248:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8249:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)]) -> (%9351:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791)], %9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)], %9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=766, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=768, solved=0))] (%9309:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=766)]) -> (%9310:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=766, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=770, solved=0))] (%9309:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=766)]) -> (%9311:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=766, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=772, solved=0))] (%9309:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=766)]) -> (%9312:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=0), )] (%9310:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769)]) -> (%9310:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=0), )] (%9310:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769)]) -> (%9313:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771, solved=0), )] (%9311:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771)]) -> (%9311:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771, solved=0), )] (%9311:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771)]) -> (%9314:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773, solved=0), )] (%9312:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773)]) -> (%9312:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773, solved=0), )] (%9312:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773)]) -> (%9315:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=775, solved=0))] (%9313:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769)]) -> (%9316:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=777, solved=0))] (%9314:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771)]) -> (%9317:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), )] (%9316:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) -> (%9316:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), )] (%9316:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) -> (%9316:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), )] (%9316:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) -> (%9318:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), )] (%9318:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)], %9316:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) -> (%9319:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), )] (%9319:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9320:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), )] (%9316:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9321:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), )] (%9321:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)], %9320:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) -> (%9322:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), )] (%9317:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) -> (%9317:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), )] (%9317:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) -> (%9317:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), )] (%9317:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) -> (%9323:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), )] (%9323:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)], %9317:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) -> (%9324:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), )] (%9324:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9325:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), )] (%9317:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9326:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), )] (%9326:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)], %9325:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) -> (%9327:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=778, solved=0), )] (%9327:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) -> (%9328:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=778)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=778, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779, solved=0), )] (%9328:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=778)]) -> (%9329:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779, solved=0), )] (%9329:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)]) -> (%9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=780, solved=0), )] (%9315:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773)]) -> (%9332:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=780)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=780, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781, solved=0), )] (%9332:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=780)]) -> (%9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23, solved=0), )] (%8248:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)]) -> (%9335:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=0), )] (%8249:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)], %9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)]) -> (%9336:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23, solved=0), )] (%9335:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) -> (%9337:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=0), )] (%9336:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)]) -> (%9338:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782, solved=0), )] (%9322:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)], %9337:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) -> (%9339:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=783, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782, solved=0), )] (%9339:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782)], %9340:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=783), constant:[0.088388346]]) -> (%9341:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784, solved=0), )] (%9341:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782)]) -> (%9342:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=785, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784, solved=0), )] (%9342:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784)], %9343:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=785), constant:[-20]]) -> (%9344:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=786, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=787, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9345:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=786), constant:[0]]) -> (%9346:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=787)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=787, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784, solved=0), )] (%9346:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=787)], %9341:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782)], %9344:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784)]) -> (%9347:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788, solved=0), )] (%9347:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784)]) -> (%9348:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789, solved=0), )] (%9348:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788)], %9338:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)]) -> (%9349:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789, solved=0), )] (%9349:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789)]) -> (%9350:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789, solved=0), )] (%9350:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789)]) -> (%9350:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=790, solved=0))] (%9350:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789)]) -> (%9351:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791)]) + cf.ReturnOp (%9351:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791)], %9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)], %9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)]) -> () } } graph.SubGraphOp @model.layers.20.mlp [using_qnn:true, symbol:model.layers.20.mlp] { - (%8930:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=771)]) -> (%8935:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=771), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=773))] (%8930:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=771)]) -> (%8931:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=775), )] (%8931:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774)]) -> (%8932:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=775)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=771), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=777), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=776))] (%8930:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=771)]) -> (%8933:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=777)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=775), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=777), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=775), )] (%8932:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=775)], %8933:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=777)]) -> (%8934:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=775)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=775), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=778))] (%8934:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=775)]) -> (%8935:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) - cf.ReturnOp (%8935:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) -> () + (%9353:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=792)]) -> (%9359:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=792, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=795, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=794, solved=0))] (%9353:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=792)]) -> (%9354:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=795)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=792, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=796, solved=0))] (%9353:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=792)]) -> (%9355:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=798, solved=0), )] (%9355:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797)]) -> (%9356:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=798)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=798, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797, solved=0), )] (%9355:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797)], %9356:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=798)]) -> (%9357:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=795, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797, solved=0), )] (%9357:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797)], %9354:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=795)]) -> (%9358:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=799, solved=0))] (%9358:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797)]) -> (%9359:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800)]) + cf.ReturnOp (%9359:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800)]) -> () } } graph.SubGraphOp @model.layers.21 [using_qnn:true, symbol:model.layers.21] { - (%8936:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8057:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8058:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%8977:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=781))] (%8936:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) -> (%8937:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780)]) - graph.CallGraphOp @model.layers.21.self_attn (%8937:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8057:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8058:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%8969:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), )] (%8969:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)], %8936:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) -> (%8970:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=805), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=806))] (%8970:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) -> (%8971:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=805)]) - graph.CallGraphOp @model.layers.21.mlp (%8971:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=805)]) -> (%8976:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813), )] (%8976:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)], %8970:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) -> (%8977:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)]) - cf.ReturnOp (%8977:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) -> () + (%9360:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8250:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8251:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)]) -> (%9412:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)], %9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=802, solved=0))] (%9360:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9361:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801)]) + graph.CallGraphOp @model.layers.21.self_attn (%9361:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8250:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8251:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)]) -> (%9403:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826)], %9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)], %9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9360:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9403:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826)]) -> (%9404:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=827, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=828, solved=0))] (%9404:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9405:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=827)]) + graph.CallGraphOp @model.layers.21.mlp (%9405:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=827)]) -> (%9411:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=835)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=835, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9404:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9411:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=835)]) -> (%9412:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%9412:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)], %9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)]) -> () } } graph.SubGraphOp @model.layers.21.self_attn [using_qnn:true, symbol:model.layers.21.self_attn] { - (%8937:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8057:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8058:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%8969:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) { - linalg.CPU.LinearOp (%8937:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780)]) -> (%8938:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=786)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=782))] (%8937:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780)]) -> (%8939:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=784))] (%8937:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780)]) -> (%8940:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=786), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=786), )] (%8938:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=786)]) -> (%8938:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=786)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=786), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=786), )] (%8938:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=786)]) -> (%8941:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=786)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783), )] (%8939:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783)]) -> (%8939:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783), )] (%8939:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783)]) -> (%8942:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785), )] (%8940:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785)]) -> (%8940:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785), )] (%8940:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785)]) -> (%8943:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=786), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=788))] (%8941:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=786)]) -> (%8944:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=790))] (%8942:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=783)]) -> (%8945:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787), )] (%8944:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8946:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789), )] (%8945:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8947:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789), outputs_0:QuantSpec(Raw(type: Float16), uuid=791), )] (%8947:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789)]) -> (%8948:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=791)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=791), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792), )] (%8948:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=791)]) -> (%8949:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792), )] (%8949:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)]) -> (%8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785), outputs_0:QuantSpec(Raw(type: Float16), uuid=793), )] (%8943:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785)]) -> (%8951:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=793)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=793), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794), )] (%8951:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=793)]) -> (%8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), )] (%8057:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)]) -> (%8953:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), )] (%8058:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) -> (%8954:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), )] (%8953:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) -> (%8955:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), )] (%8954:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%8956:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795), )] (%8946:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787)], %8955:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) -> (%8957:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795), inputs_1:QuantSpec(Raw(type: Float32), uuid=796), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795), )] (%8957:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795)], %8958:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=796), constant:[0.088388346]]) -> (%8959:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797), )] (%8959:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795)]) -> (%8960:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797), inputs_1:QuantSpec(Raw(type: Int16), uuid=798), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797), )] (%8960:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797)], %8961:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=798), constant:[-20]]) -> (%8962:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=799), outputs_0:QuantSpec(Raw(type: UInt8), uuid=800), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8963:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=799), constant:[-0.80859375]]) -> (%8964:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=800)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=800), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797), )] (%8964:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=800)], %8959:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=795)], %8962:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797)]) -> (%8965:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801), )] (%8965:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797)]) -> (%8966:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802), )] (%8966:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=801)], %8956:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%8967:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802), )] (%8967:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802)]) -> (%8968:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802), )] (%8968:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802)]) -> (%8968:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=803))] (%8968:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=802)]) -> (%8969:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) - cf.ReturnOp (%8969:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)], %8950:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=792)], %8952:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) -> () + (%9361:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8250:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8251:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)]) -> (%9403:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826)], %9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)], %9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=803, solved=0))] (%9361:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801)]) -> (%9362:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=805, solved=0))] (%9361:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801)]) -> (%9363:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=807, solved=0))] (%9361:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801)]) -> (%9364:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804, solved=0), )] (%9362:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804)]) -> (%9362:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804, solved=0), )] (%9362:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804)]) -> (%9365:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806, solved=0), )] (%9363:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806)]) -> (%9363:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806, solved=0), )] (%9363:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806)]) -> (%9366:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=0), )] (%9364:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808)]) -> (%9364:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=0), )] (%9364:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808)]) -> (%9367:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=0))] (%9365:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804)]) -> (%9368:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=812, solved=0))] (%9366:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806)]) -> (%9369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), )] (%9368:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) -> (%9368:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), )] (%9368:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) -> (%9368:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), )] (%9368:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) -> (%9370:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), )] (%9370:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)], %9368:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) -> (%9371:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), )] (%9371:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9372:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), )] (%9368:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9373:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), )] (%9373:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)], %9372:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) -> (%9374:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), )] (%9369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) -> (%9369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), )] (%9369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) -> (%9369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), )] (%9369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) -> (%9375:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), )] (%9375:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)], %9369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) -> (%9376:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), )] (%9376:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9377:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), )] (%9369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9378:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), )] (%9378:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)], %9377:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) -> (%9379:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=813, solved=0), )] (%9379:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) -> (%9380:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=813)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=813, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814, solved=0), )] (%9380:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=813)]) -> (%9381:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814, solved=0), )] (%9381:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)]) -> (%9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=815, solved=0), )] (%9367:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808)]) -> (%9384:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=815)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=815, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816, solved=0), )] (%9384:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=815)]) -> (%9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24, solved=0), )] (%8250:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)]) -> (%9387:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=0), )] (%8251:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)], %9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)]) -> (%9388:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24, solved=0), )] (%9387:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) -> (%9389:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=0), )] (%9388:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)]) -> (%9390:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817, solved=0), )] (%9374:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)], %9389:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) -> (%9391:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=818, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817, solved=0), )] (%9391:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817)], %9392:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=818), constant:[0.088388346]]) -> (%9393:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819, solved=0), )] (%9393:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817)]) -> (%9394:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=820, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819, solved=0), )] (%9394:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819)], %9395:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=820), constant:[-20]]) -> (%9396:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=821, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=822, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9397:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=821), constant:[0]]) -> (%9398:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=822)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=822, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819, solved=0), )] (%9398:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=822)], %9393:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817)], %9396:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819)]) -> (%9399:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=823, solved=0), )] (%9399:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819)]) -> (%9400:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=823)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=823, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824, solved=0), )] (%9400:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=823)], %9390:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)]) -> (%9401:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824, solved=0), )] (%9401:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824)]) -> (%9402:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824, solved=0), )] (%9402:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824)]) -> (%9402:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=825, solved=0))] (%9402:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824)]) -> (%9403:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826)]) + cf.ReturnOp (%9403:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826)], %9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)], %9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)]) -> () } } graph.SubGraphOp @model.layers.21.mlp [using_qnn:true, symbol:model.layers.21.mlp] { - (%8971:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=805)]) -> (%8976:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=805), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=808), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=807))] (%8971:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=805)]) -> (%8972:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=808)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=808), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809), )] (%8972:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=808)]) -> (%8973:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=805), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=811), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=810))] (%8971:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=805)]) -> (%8974:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=811)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=811), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809), )] (%8973:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809)], %8974:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=811)]) -> (%8975:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=812))] (%8975:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809)]) -> (%8976:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)]) - cf.ReturnOp (%8976:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)]) -> () + (%9405:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=827)]) -> (%9411:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=835)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=827, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=830, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=829, solved=0))] (%9405:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=827)]) -> (%9406:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=830)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=827, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=831, solved=0))] (%9405:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=827)]) -> (%9407:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=833, solved=0), )] (%9407:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832)]) -> (%9408:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=833)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=833, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=0), )] (%9407:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832)], %9408:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=833)]) -> (%9409:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=830, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=0), )] (%9409:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832)], %9406:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=830)]) -> (%9410:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=835, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=834, solved=0))] (%9410:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832)]) -> (%9411:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=835)]) + cf.ReturnOp (%9411:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=835)]) -> () } } graph.SubGraphOp @model.layers.22 [using_qnn:true, symbol:model.layers.22] { - (%8977:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8059:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8060:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%9018:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=815))] (%8977:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)]) -> (%8978:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)]) - graph.CallGraphOp @model.layers.22.self_attn (%8978:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8059:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8060:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%9010:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838), )] (%9010:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)], %8977:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=813)]) -> (%9011:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=840))] (%9011:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)]) -> (%9012:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839)]) - graph.CallGraphOp @model.layers.22.mlp (%9012:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839)]) -> (%9017:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), )] (%9017:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)], %9011:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)]) -> (%9018:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) - cf.ReturnOp (%9018:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)]) -> () + (%9412:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8252:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8253:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)]) -> (%9464:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)], %9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=836, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=837, solved=0))] (%9412:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9413:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=836)]) + graph.CallGraphOp @model.layers.22.self_attn (%9413:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=836)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8252:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8253:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)]) -> (%9455:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=861)], %9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)], %9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=861, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9412:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9455:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=861)]) -> (%9456:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=862, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=863, solved=0))] (%9456:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9457:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=862)]) + graph.CallGraphOp @model.layers.22.mlp (%9457:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=862)]) -> (%9463:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9456:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9463:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870)]) -> (%9464:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%9464:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)], %9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)]) -> () } } graph.SubGraphOp @model.layers.22.self_attn [using_qnn:true, symbol:model.layers.22.self_attn] { - (%8978:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8059:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8060:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%9010:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)]) { - linalg.CPU.LinearOp (%8978:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)]) -> (%8979:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=820)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=816))] (%8978:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)]) -> (%8980:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=818))] (%8978:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)]) -> (%8981:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=820), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=820), )] (%8979:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=820)]) -> (%8979:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=820)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=820), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=820), )] (%8979:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=820)]) -> (%8982:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=820)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817), )] (%8980:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817)]) -> (%8980:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817), )] (%8980:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817)]) -> (%8983:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819), )] (%8981:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819)]) -> (%8981:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819), )] (%8981:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819)]) -> (%8984:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=820), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=822))] (%8982:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=820)]) -> (%8985:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=823), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=824))] (%8983:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817)]) -> (%8986:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=823)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821), )] (%8985:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8987:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=823), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=823), )] (%8986:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=823)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%8988:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=823)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=823), outputs_0:QuantSpec(Raw(type: Float16), uuid=825), )] (%8988:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=823)]) -> (%8989:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=825)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=825), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826), )] (%8989:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=825)]) -> (%8990:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826), )] (%8990:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)]) -> (%8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819), outputs_0:QuantSpec(Raw(type: Float16), uuid=827), )] (%8984:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819)]) -> (%8992:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=827)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=827), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828), )] (%8992:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=827)]) -> (%8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), )] (%8059:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)]) -> (%8994:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), )] (%8060:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)]) -> (%8995:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), )] (%8994:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) -> (%8996:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), )] (%8995:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%8997:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829), )] (%8987:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821)], %8996:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) -> (%8998:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829), inputs_1:QuantSpec(Raw(type: Float32), uuid=830), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829), )] (%8998:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829)], %8999:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=830), constant:[0.088388346]]) -> (%9000:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831), )] (%9000:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829)]) -> (%9001:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831), inputs_1:QuantSpec(Raw(type: Int16), uuid=832), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831), )] (%9001:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831)], %9002:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=832), constant:[-20]]) -> (%9003:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=833), outputs_0:QuantSpec(Raw(type: UInt8), uuid=834), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9004:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=833), constant:[-0.42773438]]) -> (%9005:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=834)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=834), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831), )] (%9005:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=834)], %9000:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829)], %9003:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831)]) -> (%9006:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835), )] (%9006:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=831)]) -> (%9007:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836), )] (%9007:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=835)], %8997:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%9008:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836), )] (%9008:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)]) -> (%9009:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836), )] (%9009:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)]) -> (%9009:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=837))] (%9009:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)]) -> (%9010:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)]) - cf.ReturnOp (%9010:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=838)], %8991:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %8993:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=828)]) -> () + (%9413:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=836)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8252:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8253:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)]) -> (%9455:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=861)], %9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)], %9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=836, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=838, solved=0))] (%9413:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=836)]) -> (%9414:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=836, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=840, solved=0))] (%9413:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=836)]) -> (%9415:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=836, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=842, solved=0))] (%9413:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=836)]) -> (%9416:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839, solved=0), )] (%9414:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839)]) -> (%9414:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839, solved=0), )] (%9414:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839)]) -> (%9417:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=0), )] (%9415:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841)]) -> (%9415:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=0), )] (%9415:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841)]) -> (%9418:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=0), )] (%9416:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843)]) -> (%9416:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=0), )] (%9416:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843)]) -> (%9419:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=845, solved=0))] (%9417:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839)]) -> (%9420:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=847, solved=0))] (%9418:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841)]) -> (%9421:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), )] (%9420:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) -> (%9420:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), )] (%9420:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) -> (%9420:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), )] (%9420:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) -> (%9422:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), )] (%9422:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)], %9420:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) -> (%9423:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), )] (%9423:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9424:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), )] (%9420:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9425:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), )] (%9425:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)], %9424:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) -> (%9426:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), )] (%9421:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) -> (%9421:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), )] (%9421:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) -> (%9421:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), )] (%9421:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) -> (%9427:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), )] (%9427:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)], %9421:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) -> (%9428:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), )] (%9428:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9429:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), )] (%9421:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9430:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), )] (%9430:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)], %9429:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) -> (%9431:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=848, solved=0), )] (%9431:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) -> (%9432:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=848)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=848, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849, solved=0), )] (%9432:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=848)]) -> (%9433:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849, solved=0), )] (%9433:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)]) -> (%9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=850, solved=0), )] (%9419:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843)]) -> (%9436:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=850)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=850, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851, solved=0), )] (%9436:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=850)]) -> (%9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25, solved=0), )] (%8252:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)]) -> (%9439:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=0), )] (%8253:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)], %9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)]) -> (%9440:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25, solved=0), )] (%9439:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) -> (%9441:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=0), )] (%9440:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)]) -> (%9442:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852, solved=0), )] (%9426:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)], %9441:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) -> (%9443:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=853, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852, solved=0), )] (%9443:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852)], %9444:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=853), constant:[0.088388346]]) -> (%9445:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854, solved=0), )] (%9445:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852)]) -> (%9446:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=855, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854, solved=0), )] (%9446:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854)], %9447:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=855), constant:[-20]]) -> (%9448:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=856, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=857, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9449:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=856), constant:[0]]) -> (%9450:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=857)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=857, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854, solved=0), )] (%9450:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=857)], %9445:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852)], %9448:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854)]) -> (%9451:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=858, solved=0), )] (%9451:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854)]) -> (%9452:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=858)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=858, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859, solved=0), )] (%9452:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=858)], %9442:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)]) -> (%9453:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859, solved=0), )] (%9453:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859)]) -> (%9454:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859, solved=0), )] (%9454:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859)]) -> (%9454:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=861, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=860, solved=0))] (%9454:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859)]) -> (%9455:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=861)]) + cf.ReturnOp (%9455:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=861)], %9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)], %9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)]) -> () } } graph.SubGraphOp @model.layers.22.mlp [using_qnn:true, symbol:model.layers.22.mlp] { - (%9012:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839)]) -> (%9017:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=841))] (%9012:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839)]) -> (%9013:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=843), )] (%9013:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842)]) -> (%9014:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=843)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=845), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=844))] (%9012:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839)]) -> (%9015:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=845)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=843), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=845), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=843), )] (%9014:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=843)], %9015:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=845)]) -> (%9016:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=843)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=843), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=846))] (%9016:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=843)]) -> (%9017:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) - cf.ReturnOp (%9017:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) -> () + (%9457:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=862)]) -> (%9463:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=862, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=865, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=864, solved=0))] (%9457:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=862)]) -> (%9458:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=865)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=862, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=866, solved=0))] (%9457:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=862)]) -> (%9459:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=868, solved=0), )] (%9459:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867)]) -> (%9460:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=868)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=868, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=0), )] (%9459:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867)], %9460:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=868)]) -> (%9461:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=865, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=0), )] (%9461:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867)], %9458:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=865)]) -> (%9462:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=869, solved=0))] (%9462:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867)]) -> (%9463:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870)]) + cf.ReturnOp (%9463:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870)]) -> () } } graph.SubGraphOp @model.layers.23 [using_qnn:true, symbol:model.layers.23] { - (%9018:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8061:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8062:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%9059:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=849))] (%9018:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) -> (%9019:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848)]) - graph.CallGraphOp @model.layers.23.self_attn (%9019:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8061:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8062:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%9051:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872), )] (%9051:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)], %9018:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) -> (%9052:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=873), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874))] (%9052:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)]) -> (%9053:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=873)]) - graph.CallGraphOp @model.layers.23.mlp (%9053:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=873)]) -> (%9058:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881), )] (%9058:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)], %9052:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)]) -> (%9059:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) - cf.ReturnOp (%9059:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)]) -> () + (%9464:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8254:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8255:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)]) -> (%9516:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)], %9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=871, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=0))] (%9464:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9465:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=871)]) + graph.CallGraphOp @model.layers.23.self_attn (%9465:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=871)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8254:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8255:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)]) -> (%9507:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896)], %9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)], %9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9464:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9507:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896)]) -> (%9508:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=897, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=898, solved=0))] (%9508:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9509:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=897)]) + graph.CallGraphOp @model.layers.23.mlp (%9509:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=897)]) -> (%9515:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9508:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9515:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905)]) -> (%9516:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%9516:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)], %9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)]) -> () } } graph.SubGraphOp @model.layers.23.self_attn [using_qnn:true, symbol:model.layers.23.self_attn] { - (%9019:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8061:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8062:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%9051:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)]) { - linalg.CPU.LinearOp (%9019:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848)]) -> (%9020:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=854)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=850))] (%9019:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848)]) -> (%9021:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=852))] (%9019:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=848)]) -> (%9022:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=854), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=854), )] (%9020:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=854)]) -> (%9020:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=854)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=854), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=854), )] (%9020:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=854)]) -> (%9023:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=854)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851), )] (%9021:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851)]) -> (%9021:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851), )] (%9021:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851)]) -> (%9024:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853), )] (%9022:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853)]) -> (%9022:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853), )] (%9022:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853)]) -> (%9025:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=854), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=855), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=856))] (%9023:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=854)]) -> (%9026:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=855)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=858))] (%9024:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851)]) -> (%9027:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=855), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=855), )] (%9026:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=855)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9028:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=855)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857), )] (%9027:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9029:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857), outputs_0:QuantSpec(Raw(type: Float16), uuid=859), )] (%9029:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857)]) -> (%9030:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=859)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=859), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860), )] (%9030:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=859)]) -> (%9031:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860), )] (%9031:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)]) -> (%9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853), outputs_0:QuantSpec(Raw(type: Float16), uuid=861), )] (%9025:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=853)]) -> (%9033:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=861)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=861), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862), )] (%9033:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=861)]) -> (%9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), )] (%8061:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)]) -> (%9035:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), )] (%8062:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)]) -> (%9036:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), )] (%9035:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) -> (%9037:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), )] (%9036:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%9038:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=855), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863), )] (%9028:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=855)], %9037:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) -> (%9039:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863), inputs_1:QuantSpec(Raw(type: Float32), uuid=864), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863), )] (%9039:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863)], %9040:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=864), constant:[0.088388346]]) -> (%9041:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865), )] (%9041:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863)]) -> (%9042:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865), inputs_1:QuantSpec(Raw(type: Int16), uuid=866), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865), )] (%9042:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865)], %9043:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=866), constant:[-20]]) -> (%9044:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=867), outputs_0:QuantSpec(Raw(type: UInt8), uuid=868), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9045:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=867), constant:[0.96484375]]) -> (%9046:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=868)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=868), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865), )] (%9046:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=868)], %9041:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863)], %9044:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865)]) -> (%9047:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869), )] (%9047:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=865)]) -> (%9048:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870), )] (%9048:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869)], %9038:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%9049:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870), )] (%9049:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870)]) -> (%9050:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870), )] (%9050:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870)]) -> (%9050:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=871))] (%9050:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870)]) -> (%9051:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)]) - cf.ReturnOp (%9051:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)], %9032:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=860)], %9034:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=862)]) -> () + (%9465:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=871)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8254:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8255:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)]) -> (%9507:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896)], %9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)], %9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=871, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=873, solved=0))] (%9465:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=871)]) -> (%9466:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=871, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=875, solved=0))] (%9465:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=871)]) -> (%9467:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=871, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=877, solved=0))] (%9465:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=871)]) -> (%9468:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874, solved=0), )] (%9466:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874)]) -> (%9466:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874, solved=0), )] (%9466:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874)]) -> (%9469:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=0), )] (%9467:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876)]) -> (%9467:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=0), )] (%9467:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876)]) -> (%9470:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878, solved=0), )] (%9468:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878)]) -> (%9468:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878, solved=0), )] (%9468:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878)]) -> (%9471:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=880, solved=0))] (%9469:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874)]) -> (%9472:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=882, solved=0))] (%9470:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876)]) -> (%9473:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), )] (%9472:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) -> (%9472:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), )] (%9472:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) -> (%9472:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), )] (%9472:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) -> (%9474:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), )] (%9474:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)], %9472:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) -> (%9475:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), )] (%9475:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9476:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), )] (%9472:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9477:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), )] (%9477:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)], %9476:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) -> (%9478:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), )] (%9473:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) -> (%9473:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), )] (%9473:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) -> (%9473:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), )] (%9473:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) -> (%9479:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), )] (%9479:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)], %9473:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) -> (%9480:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), )] (%9480:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9481:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), )] (%9473:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9482:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), )] (%9482:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)], %9481:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) -> (%9483:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=883, solved=0), )] (%9483:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) -> (%9484:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=883)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=883, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884, solved=0), )] (%9484:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=883)]) -> (%9485:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884, solved=0), )] (%9485:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)]) -> (%9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=885, solved=0), )] (%9471:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878)]) -> (%9488:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=885)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=885, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886, solved=0), )] (%9488:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=885)]) -> (%9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26, solved=0), )] (%8254:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)]) -> (%9491:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=0), )] (%8255:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)], %9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)]) -> (%9492:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26, solved=0), )] (%9491:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) -> (%9493:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=0), )] (%9492:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)]) -> (%9494:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887, solved=0), )] (%9478:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)], %9493:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) -> (%9495:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=888, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887, solved=0), )] (%9495:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887)], %9496:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=888), constant:[0.088388346]]) -> (%9497:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=0), )] (%9497:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887)]) -> (%9498:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=890, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=0), )] (%9498:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889)], %9499:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=890), constant:[-20]]) -> (%9500:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=891, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=892, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9501:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=891), constant:[0]]) -> (%9502:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=892)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=892, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=0), )] (%9502:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=892)], %9497:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887)], %9500:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889)]) -> (%9503:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=893, solved=0), )] (%9503:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889)]) -> (%9504:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=893)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=893, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=0), )] (%9504:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=893)], %9494:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)]) -> (%9505:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=0), )] (%9505:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894)]) -> (%9506:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=0), )] (%9506:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894)]) -> (%9506:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=895, solved=0))] (%9506:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894)]) -> (%9507:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896)]) + cf.ReturnOp (%9507:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896)], %9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)], %9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)]) -> () } } graph.SubGraphOp @model.layers.23.mlp [using_qnn:true, symbol:model.layers.23.mlp] { - (%9053:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=873)]) -> (%9058:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=873), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=876), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=875))] (%9053:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=873)]) -> (%9054:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=876)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=876), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877), )] (%9054:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=876)]) -> (%9055:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=873), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=878))] (%9053:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=873)]) -> (%9056:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877), )] (%9055:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877)], %9056:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879)]) -> (%9057:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=880))] (%9057:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877)]) -> (%9058:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) - cf.ReturnOp (%9058:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) -> () + (%9509:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=897)]) -> (%9515:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=897, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=900, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=899, solved=0))] (%9509:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=897)]) -> (%9510:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=900)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=897, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=901, solved=0))] (%9509:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=897)]) -> (%9511:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=903, solved=0), )] (%9511:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902)]) -> (%9512:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=903)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=903, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=0), )] (%9511:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902)], %9512:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=903)]) -> (%9513:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=900, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=0), )] (%9513:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902)], %9510:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=900)]) -> (%9514:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=904, solved=0))] (%9514:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902)]) -> (%9515:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905)]) + cf.ReturnOp (%9515:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905)]) -> () } } graph.SubGraphOp @model.layers.24 [using_qnn:true, symbol:model.layers.24] { - (%9059:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8063:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8064:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%9100:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=883))] (%9059:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) -> (%9060:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882)]) - graph.CallGraphOp @model.layers.24.self_attn (%9060:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8063:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8064:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%9092:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906), )] (%9092:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)], %9059:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) -> (%9093:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=908))] (%9093:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)]) -> (%9094:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907)]) - graph.CallGraphOp @model.layers.24.mlp (%9094:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907)]) -> (%9099:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915), )] (%9099:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)], %9093:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)]) -> (%9100:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)]) - cf.ReturnOp (%9100:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)]) -> () + (%9516:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8256:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8257:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)]) -> (%9568:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)], %9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=907, solved=0))] (%9516:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9517:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906)]) + graph.CallGraphOp @model.layers.24.self_attn (%9517:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8256:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8257:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)]) -> (%9559:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=931)], %9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)], %9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=931, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9516:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9559:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=931)]) -> (%9560:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=933, solved=0))] (%9560:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9561:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932)]) + graph.CallGraphOp @model.layers.24.mlp (%9561:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932)]) -> (%9567:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9560:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9567:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940)]) -> (%9568:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%9568:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)], %9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)]) -> () } } graph.SubGraphOp @model.layers.24.self_attn [using_qnn:true, symbol:model.layers.24.self_attn] { - (%9060:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8063:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8064:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%9092:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)]) { - linalg.CPU.LinearOp (%9060:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882)]) -> (%9061:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=888)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=884))] (%9060:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882)]) -> (%9062:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=886))] (%9060:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882)]) -> (%9063:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=888), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=888), )] (%9061:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=888)]) -> (%9061:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=888)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=888), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=888), )] (%9061:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=888)]) -> (%9064:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=888)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885), )] (%9062:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885)]) -> (%9062:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885), )] (%9062:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885)]) -> (%9065:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887), )] (%9063:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887)]) -> (%9063:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887), )] (%9063:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887)]) -> (%9066:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=888), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=890))] (%9064:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=888)]) -> (%9067:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=891), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=892))] (%9065:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=885)]) -> (%9068:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=891)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889), )] (%9067:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9069:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=891), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=891), )] (%9068:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=891)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9070:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=891)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=891), outputs_0:QuantSpec(Raw(type: Float16), uuid=893), )] (%9070:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=891)]) -> (%9071:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=893)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=893), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894), )] (%9071:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=893)]) -> (%9072:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894), )] (%9072:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)]) -> (%9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887), outputs_0:QuantSpec(Raw(type: Float16), uuid=895), )] (%9066:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887)]) -> (%9074:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=895)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=895), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896), )] (%9074:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=895)]) -> (%9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), )] (%8063:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)]) -> (%9076:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), )] (%8064:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)]) -> (%9077:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), )] (%9076:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) -> (%9078:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), )] (%9077:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%9079:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897), )] (%9069:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889)], %9078:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) -> (%9080:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897), inputs_1:QuantSpec(Raw(type: Float32), uuid=898), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897), )] (%9080:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897)], %9081:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=898), constant:[0.088388346]]) -> (%9082:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899), )] (%9082:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897)]) -> (%9083:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899), inputs_1:QuantSpec(Raw(type: Int16), uuid=900), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899), )] (%9083:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899)], %9084:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=900), constant:[-20]]) -> (%9085:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=901), outputs_0:QuantSpec(Raw(type: UInt8), uuid=902), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9086:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=901), constant:[0.07910156]]) -> (%9087:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=902)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=902), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899), )] (%9087:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=902)], %9082:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897)], %9085:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899)]) -> (%9088:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903), )] (%9088:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899)]) -> (%9089:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904), )] (%9089:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=903)], %9079:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%9090:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904), )] (%9090:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)]) -> (%9091:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904), )] (%9091:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)]) -> (%9091:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=905))] (%9091:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)]) -> (%9092:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)]) - cf.ReturnOp (%9092:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=906)], %9073:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=894)], %9075:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=896)]) -> () + (%9517:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8256:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8257:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)]) -> (%9559:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=931)], %9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)], %9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=908, solved=0))] (%9517:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906)]) -> (%9518:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=910, solved=0))] (%9517:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906)]) -> (%9519:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=912, solved=0))] (%9517:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906)]) -> (%9520:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909, solved=0), )] (%9518:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909)]) -> (%9518:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909, solved=0), )] (%9518:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909)]) -> (%9521:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911, solved=0), )] (%9519:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911)]) -> (%9519:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911, solved=0), )] (%9519:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911)]) -> (%9522:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=0), )] (%9520:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913)]) -> (%9520:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=0), )] (%9520:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913)]) -> (%9523:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=915, solved=0))] (%9521:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909)]) -> (%9524:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=917, solved=0))] (%9522:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911)]) -> (%9525:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), )] (%9524:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) -> (%9524:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), )] (%9524:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) -> (%9524:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), )] (%9524:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) -> (%9526:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), )] (%9526:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)], %9524:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) -> (%9527:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), )] (%9527:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9528:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), )] (%9524:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9529:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), )] (%9529:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)], %9528:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) -> (%9530:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), )] (%9525:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) -> (%9525:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), )] (%9525:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) -> (%9525:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), )] (%9525:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) -> (%9531:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), )] (%9531:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)], %9525:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) -> (%9532:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), )] (%9532:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9533:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), )] (%9525:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9534:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), )] (%9534:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)], %9533:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) -> (%9535:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=918, solved=0), )] (%9535:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) -> (%9536:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=918)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=918, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919, solved=0), )] (%9536:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=918)]) -> (%9537:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919, solved=0), )] (%9537:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)]) -> (%9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=920, solved=0), )] (%9523:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913)]) -> (%9540:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=920)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=920, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921, solved=0), )] (%9540:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=920)]) -> (%9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27, solved=0), )] (%8256:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)]) -> (%9543:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=0), )] (%8257:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)], %9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)]) -> (%9544:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27, solved=0), )] (%9543:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) -> (%9545:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=0), )] (%9544:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)]) -> (%9546:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=0), )] (%9530:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)], %9545:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) -> (%9547:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=923, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=0), )] (%9547:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922)], %9548:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=923), constant:[0.088388346]]) -> (%9549:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=0), )] (%9549:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922)]) -> (%9550:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=925, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=0), )] (%9550:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924)], %9551:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=925), constant:[-20]]) -> (%9552:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=926, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=927, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9553:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=926), constant:[0]]) -> (%9554:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=927)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=927, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=0), )] (%9554:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=927)], %9549:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922)], %9552:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924)]) -> (%9555:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=928, solved=0), )] (%9555:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924)]) -> (%9556:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=928)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=928, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929, solved=0), )] (%9556:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=928)], %9546:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)]) -> (%9557:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929, solved=0), )] (%9557:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929)]) -> (%9558:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929, solved=0), )] (%9558:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929)]) -> (%9558:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=931, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=930, solved=0))] (%9558:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929)]) -> (%9559:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=931)]) + cf.ReturnOp (%9559:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=931)], %9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)], %9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)]) -> () } } graph.SubGraphOp @model.layers.24.mlp [using_qnn:true, symbol:model.layers.24.mlp] { - (%9094:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907)]) -> (%9099:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=910), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=909))] (%9094:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907)]) -> (%9095:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=910)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=910), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=911), )] (%9095:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=910)]) -> (%9096:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=911)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=913), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=912))] (%9094:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907)]) -> (%9097:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=913)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=911), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=913), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=911), )] (%9096:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=911)], %9097:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=913)]) -> (%9098:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=911)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=911), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=914))] (%9098:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=911)]) -> (%9099:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)]) - cf.ReturnOp (%9099:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)]) -> () + (%9561:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932)]) -> (%9567:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=935, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=934, solved=0))] (%9561:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932)]) -> (%9562:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=935)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=936, solved=0))] (%9561:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932)]) -> (%9563:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=938, solved=0), )] (%9563:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937)]) -> (%9564:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=938)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=938, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937, solved=0), )] (%9563:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937)], %9564:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=938)]) -> (%9565:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=935, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937, solved=0), )] (%9565:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937)], %9562:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=935)]) -> (%9566:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=939, solved=0))] (%9566:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937)]) -> (%9567:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940)]) + cf.ReturnOp (%9567:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940)]) -> () } } graph.SubGraphOp @model.layers.25 [using_qnn:true, symbol:model.layers.25] { - (%9100:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8065:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8066:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%9141:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=917))] (%9100:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)]) -> (%9101:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916)]) - graph.CallGraphOp @model.layers.25.self_attn (%9101:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8065:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8066:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%9133:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940), )] (%9133:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)], %9100:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=915)]) -> (%9134:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=941), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=942))] (%9134:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)]) -> (%9135:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=941)]) - graph.CallGraphOp @model.layers.25.mlp (%9135:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=941)]) -> (%9140:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949), )] (%9140:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)], %9134:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)]) -> (%9141:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)]) - cf.ReturnOp (%9141:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)]) -> () + (%9568:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8258:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8259:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)]) -> (%9620:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)], %9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=941, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=942, solved=0))] (%9568:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9569:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=941)]) + graph.CallGraphOp @model.layers.25.self_attn (%9569:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=941)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8258:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8259:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)]) -> (%9611:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=966)], %9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)], %9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=966, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9568:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9611:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=966)]) -> (%9612:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=967, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=968, solved=0))] (%9612:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9613:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=967)]) + graph.CallGraphOp @model.layers.25.mlp (%9613:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=967)]) -> (%9619:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=975)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=975, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9612:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9619:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=975)]) -> (%9620:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%9620:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)], %9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)]) -> () } } graph.SubGraphOp @model.layers.25.self_attn [using_qnn:true, symbol:model.layers.25.self_attn] { - (%9101:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8065:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8066:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%9133:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)]) { - linalg.CPU.LinearOp (%9101:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916)]) -> (%9102:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=922)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=918))] (%9101:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916)]) -> (%9103:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=920))] (%9101:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=916)]) -> (%9104:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=922), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=922), )] (%9102:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=922)]) -> (%9102:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=922)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=922), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=922), )] (%9102:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=922)]) -> (%9105:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=922)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919), )] (%9103:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919)]) -> (%9103:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919), )] (%9103:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919)]) -> (%9106:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921), )] (%9104:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921)]) -> (%9104:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921), )] (%9104:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921)]) -> (%9107:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=922), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=923), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=924))] (%9105:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=922)]) -> (%9108:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=923)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=925), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=926))] (%9106:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=919)]) -> (%9109:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=925)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=923), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=923), )] (%9108:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=923)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9110:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=923)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=925), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=925), )] (%9109:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=925)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9111:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=925)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=925), outputs_0:QuantSpec(Raw(type: Float16), uuid=927), )] (%9111:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=925)]) -> (%9112:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=927)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=927), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928), )] (%9112:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=927)]) -> (%9113:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928), )] (%9113:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)]) -> (%9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921), outputs_0:QuantSpec(Raw(type: Float16), uuid=929), )] (%9107:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=921)]) -> (%9115:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=929)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=929), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930), )] (%9115:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=929)]) -> (%9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), )] (%8065:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)]) -> (%9117:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), )] (%8066:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)]) -> (%9118:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), )] (%9117:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) -> (%9119:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), )] (%9118:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%9120:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=923), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931), )] (%9110:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=923)], %9119:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) -> (%9121:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931), inputs_1:QuantSpec(Raw(type: Float32), uuid=932), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931), )] (%9121:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931)], %9122:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=932), constant:[0.088388346]]) -> (%9123:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933), )] (%9123:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931)]) -> (%9124:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933), inputs_1:QuantSpec(Raw(type: Int16), uuid=934), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933), )] (%9124:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933)], %9125:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=934), constant:[-20]]) -> (%9126:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=935), outputs_0:QuantSpec(Raw(type: UInt8), uuid=936), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9127:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=935), constant:[-0.9921875]]) -> (%9128:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=936)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=936), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933), )] (%9128:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=936)], %9123:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=931)], %9126:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933)]) -> (%9129:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937), )] (%9129:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=933)]) -> (%9130:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938), )] (%9130:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=937)], %9120:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%9131:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938), )] (%9131:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938)]) -> (%9132:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938), )] (%9132:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938)]) -> (%9132:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=939))] (%9132:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=938)]) -> (%9133:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)]) - cf.ReturnOp (%9133:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=940)], %9114:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=928)], %9116:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=930)]) -> () + (%9569:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=941)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8258:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8259:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)]) -> (%9611:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=966)], %9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)], %9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=941, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=943, solved=0))] (%9569:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=941)]) -> (%9570:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=941, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=945, solved=0))] (%9569:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=941)]) -> (%9571:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=941, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=947, solved=0))] (%9569:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=941)]) -> (%9572:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944, solved=0), )] (%9570:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944)]) -> (%9570:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944, solved=0), )] (%9570:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944)]) -> (%9573:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=0), )] (%9571:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946)]) -> (%9571:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=0), )] (%9571:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946)]) -> (%9574:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=0), )] (%9572:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948)]) -> (%9572:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=0), )] (%9572:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948)]) -> (%9575:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=950, solved=0))] (%9573:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944)]) -> (%9576:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=952, solved=0))] (%9574:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946)]) -> (%9577:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), )] (%9576:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) -> (%9576:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), )] (%9576:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) -> (%9576:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), )] (%9576:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) -> (%9578:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), )] (%9578:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)], %9576:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) -> (%9579:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), )] (%9579:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9580:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), )] (%9576:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9581:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), )] (%9581:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)], %9580:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) -> (%9582:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), )] (%9577:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) -> (%9577:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), )] (%9577:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) -> (%9577:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), )] (%9577:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) -> (%9583:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), )] (%9583:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)], %9577:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) -> (%9584:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), )] (%9584:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9585:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), )] (%9577:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9586:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), )] (%9586:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)], %9585:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) -> (%9587:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=953, solved=0), )] (%9587:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) -> (%9588:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=953)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=953, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954, solved=0), )] (%9588:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=953)]) -> (%9589:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954, solved=0), )] (%9589:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)]) -> (%9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=955, solved=0), )] (%9575:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948)]) -> (%9592:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=955)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=955, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956, solved=0), )] (%9592:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=955)]) -> (%9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28, solved=0), )] (%8258:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)]) -> (%9595:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=0), )] (%8259:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)], %9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)]) -> (%9596:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28, solved=0), )] (%9595:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) -> (%9597:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=0), )] (%9596:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)]) -> (%9598:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=0), )] (%9582:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)], %9597:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) -> (%9599:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=958, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=0), )] (%9599:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957)], %9600:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=958), constant:[0.088388346]]) -> (%9601:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=0), )] (%9601:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957)]) -> (%9602:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=0), )] (%9602:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959)], %9603:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960), constant:[-20]]) -> (%9604:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=961, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=962, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9605:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=961), constant:[0]]) -> (%9606:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=962)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=962, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=0), )] (%9606:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=962)], %9601:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957)], %9604:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959)]) -> (%9607:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=963, solved=0), )] (%9607:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959)]) -> (%9608:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=963)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=963, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=0), )] (%9608:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=963)], %9598:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)]) -> (%9609:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=0), )] (%9609:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964)]) -> (%9610:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=0), )] (%9610:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964)]) -> (%9610:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=966, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=965, solved=0))] (%9610:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964)]) -> (%9611:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=966)]) + cf.ReturnOp (%9611:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=966)], %9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)], %9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)]) -> () } } graph.SubGraphOp @model.layers.25.mlp [using_qnn:true, symbol:model.layers.25.mlp] { - (%9135:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=941)]) -> (%9140:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=941), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=944), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=943))] (%9135:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=941)]) -> (%9136:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=944)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=944), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=945), )] (%9136:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=944)]) -> (%9137:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=945)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=941), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=947), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=946))] (%9135:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=941)]) -> (%9138:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=947)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=945), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=947), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=945), )] (%9137:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=945)], %9138:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=947)]) -> (%9139:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=945)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=945), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=948))] (%9139:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=945)]) -> (%9140:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)]) - cf.ReturnOp (%9140:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)]) -> () + (%9613:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=967)]) -> (%9619:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=975)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=967, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=970, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=969, solved=0))] (%9613:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=967)]) -> (%9614:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=970)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=967, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=971, solved=0))] (%9613:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=967)]) -> (%9615:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=973, solved=0), )] (%9615:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972)]) -> (%9616:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=973)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=973, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=0), )] (%9615:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972)], %9616:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=973)]) -> (%9617:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=970, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=0), )] (%9617:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972)], %9614:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=970)]) -> (%9618:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=975, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=974, solved=0))] (%9618:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972)]) -> (%9619:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=975)]) + cf.ReturnOp (%9619:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=975)]) -> () } } graph.SubGraphOp @model.layers.26 [using_qnn:true, symbol:model.layers.26] { - (%9141:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8067:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8068:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%9182:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=951))] (%9141:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)]) -> (%9142:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950)]) - graph.CallGraphOp @model.layers.26.self_attn (%9142:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8067:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8068:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%9174:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974), )] (%9174:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)], %9141:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=949)]) -> (%9175:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=975), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=976))] (%9175:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)]) -> (%9176:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=975)]) - graph.CallGraphOp @model.layers.26.mlp (%9176:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=975)]) -> (%9181:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983), )] (%9181:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)], %9175:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)]) -> (%9182:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)]) - cf.ReturnOp (%9182:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)]) -> () + (%9620:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8260:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8261:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)]) -> (%9672:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)], %9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=977, solved=0))] (%9620:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9621:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976)]) + graph.CallGraphOp @model.layers.26.self_attn (%9621:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8260:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8261:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)]) -> (%9663:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1001)], %9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)], %9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1001, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9620:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9663:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1001)]) -> (%9664:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1003, solved=0))] (%9664:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9665:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002)]) + graph.CallGraphOp @model.layers.26.mlp (%9665:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002)]) -> (%9671:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9664:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9671:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010)]) -> (%9672:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%9672:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)], %9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)]) -> () } } graph.SubGraphOp @model.layers.26.self_attn [using_qnn:true, symbol:model.layers.26.self_attn] { - (%9142:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8067:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8068:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%9174:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)]) { - linalg.CPU.LinearOp (%9142:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950)]) -> (%9143:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=956)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=952))] (%9142:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950)]) -> (%9144:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=954))] (%9142:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=950)]) -> (%9145:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=956), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=956), )] (%9143:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=956)]) -> (%9143:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=956)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=956), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=956), )] (%9143:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=956)]) -> (%9146:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=956)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953), )] (%9144:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953)]) -> (%9144:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953), )] (%9144:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953)]) -> (%9147:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955), )] (%9145:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955)]) -> (%9145:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955), )] (%9145:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955)]) -> (%9148:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=956), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=957), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=958))] (%9146:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=956)]) -> (%9149:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=957)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=959), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=960))] (%9147:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=953)]) -> (%9150:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=959)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=957), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=957), )] (%9149:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=957)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9151:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=957)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=959), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=959), )] (%9150:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=959)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9152:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=959)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=959), outputs_0:QuantSpec(Raw(type: Float16), uuid=961), )] (%9152:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=959)]) -> (%9153:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=961)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=961), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962), )] (%9153:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=961)]) -> (%9154:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962), )] (%9154:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)]) -> (%9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955), outputs_0:QuantSpec(Raw(type: Float16), uuid=963), )] (%9148:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=955)]) -> (%9156:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=963)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=963), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964), )] (%9156:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=963)]) -> (%9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), )] (%8067:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)]) -> (%9158:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), )] (%8068:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)]) -> (%9159:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), )] (%9158:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) -> (%9160:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), )] (%9159:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%9161:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=957), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965), )] (%9151:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=957)], %9160:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) -> (%9162:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965), inputs_1:QuantSpec(Raw(type: Float32), uuid=966), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965), )] (%9162:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965)], %9163:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=966), constant:[0.088388346]]) -> (%9164:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967), )] (%9164:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965)]) -> (%9165:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967), inputs_1:QuantSpec(Raw(type: Int16), uuid=968), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967), )] (%9165:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967)], %9166:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=968), constant:[-20]]) -> (%9167:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=969), outputs_0:QuantSpec(Raw(type: UInt8), uuid=970), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9168:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=969), constant:[0.27929688]]) -> (%9169:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=970)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=970), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967), )] (%9169:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=970)], %9164:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=965)], %9167:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967)]) -> (%9170:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971), )] (%9170:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=967)]) -> (%9171:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972), )] (%9171:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=971)], %9161:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%9172:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972), )] (%9172:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972)]) -> (%9173:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972), )] (%9173:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972)]) -> (%9173:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=973))] (%9173:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=972)]) -> (%9174:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)]) - cf.ReturnOp (%9174:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=974)], %9155:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=962)], %9157:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=964)]) -> () + (%9621:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8260:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8261:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)]) -> (%9663:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1001)], %9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)], %9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=978, solved=0))] (%9621:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976)]) -> (%9622:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=980, solved=0))] (%9621:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976)]) -> (%9623:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=982, solved=0))] (%9621:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976)]) -> (%9624:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979, solved=0), )] (%9622:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979)]) -> (%9622:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979, solved=0), )] (%9622:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979)]) -> (%9625:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=0), )] (%9623:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981)]) -> (%9623:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=0), )] (%9623:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981)]) -> (%9626:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983, solved=0), )] (%9624:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983)]) -> (%9624:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983, solved=0), )] (%9624:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983)]) -> (%9627:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=985, solved=0))] (%9625:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979)]) -> (%9628:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=987, solved=0))] (%9626:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981)]) -> (%9629:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), )] (%9628:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) -> (%9628:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), )] (%9628:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) -> (%9628:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), )] (%9628:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) -> (%9630:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), )] (%9630:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)], %9628:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) -> (%9631:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), )] (%9631:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9632:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), )] (%9628:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9633:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), )] (%9633:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)], %9632:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) -> (%9634:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), )] (%9629:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) -> (%9629:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), )] (%9629:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) -> (%9629:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), )] (%9629:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) -> (%9635:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), )] (%9635:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)], %9629:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) -> (%9636:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), )] (%9636:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9637:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), )] (%9629:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9638:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), )] (%9638:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)], %9637:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) -> (%9639:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=988, solved=0), )] (%9639:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) -> (%9640:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=988)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=988, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989, solved=0), )] (%9640:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=988)]) -> (%9641:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989, solved=0), )] (%9641:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)]) -> (%9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=990, solved=0), )] (%9627:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983)]) -> (%9644:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=990)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=990, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991, solved=0), )] (%9644:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=990)]) -> (%9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29, solved=0), )] (%8260:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)]) -> (%9647:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=0), )] (%8261:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)], %9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)]) -> (%9648:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29, solved=0), )] (%9647:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) -> (%9649:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=0), )] (%9648:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)]) -> (%9650:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992, solved=0), )] (%9634:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)], %9649:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) -> (%9651:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=993, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992, solved=0), )] (%9651:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992)], %9652:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=993), constant:[0.088388346]]) -> (%9653:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994, solved=0), )] (%9653:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992)]) -> (%9654:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=995, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994, solved=0), )] (%9654:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994)], %9655:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=995), constant:[-20]]) -> (%9656:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=996, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=997, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9657:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=996), constant:[0]]) -> (%9658:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=997)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=997, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994, solved=0), )] (%9658:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=997)], %9653:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992)], %9656:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994)]) -> (%9659:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=0), )] (%9659:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994)]) -> (%9660:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999, solved=0), )] (%9660:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998)], %9650:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)]) -> (%9661:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999, solved=0), )] (%9661:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999)]) -> (%9662:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999, solved=0), )] (%9662:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999)]) -> (%9662:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1001, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1000, solved=0))] (%9662:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999)]) -> (%9663:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1001)]) + cf.ReturnOp (%9663:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1001)], %9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)], %9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)]) -> () } } graph.SubGraphOp @model.layers.26.mlp [using_qnn:true, symbol:model.layers.26.mlp] { - (%9176:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=975)]) -> (%9181:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=975), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=978), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=977))] (%9176:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=975)]) -> (%9177:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=978)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=978), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=979), )] (%9177:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=978)]) -> (%9178:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=979)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=975), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=981), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=980))] (%9176:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=975)]) -> (%9179:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=981)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=979), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=981), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=979), )] (%9178:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=979)], %9179:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=981)]) -> (%9180:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=979)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=979), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=982))] (%9180:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=979)]) -> (%9181:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)]) - cf.ReturnOp (%9181:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)]) -> () + (%9665:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002)]) -> (%9671:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1005, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1004, solved=0))] (%9665:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002)]) -> (%9666:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1005)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1006, solved=0))] (%9665:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002)]) -> (%9667:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1008, solved=0), )] (%9667:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007)]) -> (%9668:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1008)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1008, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007, solved=0), )] (%9667:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007)], %9668:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1008)]) -> (%9669:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1005, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007, solved=0), )] (%9669:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007)], %9666:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1005)]) -> (%9670:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1009, solved=0))] (%9670:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007)]) -> (%9671:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010)]) + cf.ReturnOp (%9671:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010)]) -> () } } graph.SubGraphOp @model.layers.27 [using_qnn:true, symbol:model.layers.27] { - (%9182:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8069:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8070:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%9223:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=985))] (%9182:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)]) -> (%9183:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984)]) - graph.CallGraphOp @model.layers.27.self_attn (%9183:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8069:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8070:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%9215:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008), )] (%9215:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)], %9182:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=983)]) -> (%9216:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1009), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1010))] (%9216:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)]) -> (%9217:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1009)]) - graph.CallGraphOp @model.layers.27.mlp (%9217:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1009)]) -> (%9222:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017), )] (%9222:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)], %9216:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)]) -> (%9223:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)]) - cf.ReturnOp (%9223:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) -> () + (%9672:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8262:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8263:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)]) -> (%9724:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)], %9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) { + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1012, solved=0))] (%9672:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9673:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011)]) + graph.CallGraphOp @model.layers.27.self_attn (%9673:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8262:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8263:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)]) -> (%9715:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036)], %9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)], %9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9672:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9715:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036)]) -> (%9716:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1037, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=0))] (%9716:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9717:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1037)]) + graph.CallGraphOp @model.layers.27.mlp (%9717:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1037)]) -> (%9723:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1045)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1045, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9716:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9723:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1045)]) -> (%9724:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + cf.ReturnOp (%9724:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)], %9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) -> () } } graph.SubGraphOp @model.layers.27.self_attn [using_qnn:true, symbol:model.layers.27.self_attn] { - (%9183:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8069:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8070:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%9215:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) { - linalg.CPU.LinearOp (%9183:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984)]) -> (%9184:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=990)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=986))] (%9183:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984)]) -> (%9185:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=988))] (%9183:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=984)]) -> (%9186:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=990), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=990), )] (%9184:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=990)]) -> (%9184:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=990)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=990), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=990), )] (%9184:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=990)]) -> (%9187:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=990)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987), )] (%9185:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987)]) -> (%9185:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987), )] (%9185:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987)]) -> (%9188:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989), )] (%9186:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989)]) -> (%9186:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989), )] (%9186:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989)]) -> (%9189:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=990), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=991), weight_weight:QuantSpec(Raw(type: Int16PerTensor), uuid=992))] (%9187:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=990)]) -> (%9190:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=991)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=993), weight_weight:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=994))] (%9188:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=987)]) -> (%9191:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=993)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=991), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=991), )] (%9190:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=991)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9192:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=991)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=993), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=993), )] (%9191:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=993)], %8074:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)], %8075:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%9193:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=993)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=993), outputs_0:QuantSpec(Raw(type: Float16), uuid=995), )] (%9193:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=993)]) -> (%9194:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=995)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=995), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996), )] (%9194:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=995)]) -> (%9195:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996), )] (%9195:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)]) -> (%9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989), outputs_0:QuantSpec(Raw(type: Float16), uuid=997), )] (%9189:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=989)]) -> (%9197:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=997)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=997), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998), )] (%9197:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=997)]) -> (%9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), )] (%8069:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)]) -> (%9199:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), )] (%8070:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) -> (%9200:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), )] (%9199:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) -> (%9201:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), )] (%9200:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%9202:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=991), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999), )] (%9192:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=991)], %9201:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) -> (%9203:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999), inputs_1:QuantSpec(Raw(type: Float32), uuid=1000), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999), )] (%9203:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999)], %9204:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1000), constant:[0.088388346]]) -> (%9205:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001), )] (%9205:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999)]) -> (%9206:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001), inputs_1:QuantSpec(Raw(type: Int16), uuid=1002), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001), )] (%9206:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001)], %9207:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=1002), constant:[-20]]) -> (%9208:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=1003), outputs_0:QuantSpec(Raw(type: UInt8), uuid=1004), )] (%8014:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9209:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=1003), constant:[0.890625]]) -> (%9210:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1004)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=1004), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001), )] (%9210:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1004)], %9205:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=999)], %9208:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001)]) -> (%9211:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005), )] (%9211:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1001)]) -> (%9212:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006), )] (%9212:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1005)], %9202:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%9213:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006), )] (%9213:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006)]) -> (%9214:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006), )] (%9214:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006)]) -> (%9214:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1007))] (%9214:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1006)]) -> (%9215:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)]) - cf.ReturnOp (%9215:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1008)], %9196:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=996)], %9198:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=998)]) -> () + (%9673:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8262:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8263:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)]) -> (%9715:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036)], %9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)], %9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1013, solved=0))] (%9673:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011)]) -> (%9674:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1015, solved=0))] (%9673:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011)]) -> (%9675:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1017, solved=0))] (%9673:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011)]) -> (%9676:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014, solved=0), )] (%9674:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014)]) -> (%9674:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014, solved=0), )] (%9674:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014)]) -> (%9677:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=0), )] (%9675:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016)]) -> (%9675:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=0), )] (%9675:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016)]) -> (%9678:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=0), )] (%9676:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018)]) -> (%9676:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=0), )] (%9676:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018)]) -> (%9679:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1020, solved=0))] (%9677:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014)]) -> (%9680:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) + linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1022, solved=0))] (%9678:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016)]) -> (%9681:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), )] (%9680:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) -> (%9680:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), )] (%9680:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) -> (%9680:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), )] (%9680:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) -> (%9682:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), )] (%9682:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)], %9680:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) -> (%9683:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), )] (%9683:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9684:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), )] (%9680:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9685:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), )] (%9685:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)], %9684:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) -> (%9686:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), )] (%9681:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) -> (%9681:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) + linalg.CPU.SliceOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), )] (%9681:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) -> (%9681:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) + linalg.CPU.NegOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), )] (%9681:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) -> (%9687:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), )] (%9687:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)], %9681:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) -> (%9688:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), )] (%9688:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9689:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), )] (%9681:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9690:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), )] (%9690:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)], %9689:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) -> (%9691:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=1023, solved=0), )] (%9691:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) -> (%9692:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=1023)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=1023, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024, solved=0), )] (%9692:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=1023)]) -> (%9693:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024, solved=0), )] (%9693:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)]) -> (%9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=1025, solved=0), )] (%9679:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018)]) -> (%9696:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=1025)]) + linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=1025, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026, solved=0), )] (%9696:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=1025)]) -> (%9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30, solved=0), )] (%8262:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)]) -> (%9699:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) + linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=0), )] (%8263:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)], %9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) -> (%9700:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30, solved=0), )] (%9699:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) -> (%9701:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) + linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=0), )] (%9700:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)]) -> (%9702:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027, solved=0), )] (%9686:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)], %9701:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) -> (%9703:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1028, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027, solved=0), )] (%9703:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027)], %9704:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1028), constant:[0.088388346]]) -> (%9705:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027)]) + linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029, solved=0), )] (%9705:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027)]) -> (%9706:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029)]) + linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1030, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029, solved=0), )] (%9706:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029)], %9707:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1030), constant:[-20]]) -> (%9708:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029)]) + linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=1031, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=1032, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9709:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=1031), constant:[0]]) -> (%9710:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1032)]) + linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=1032, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029, solved=0), )] (%9710:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1032)], %9705:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027)], %9708:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029)]) -> (%9711:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029)]) + linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033, solved=0), )] (%9711:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029)]) -> (%9712:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033)]) + linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034, solved=0), )] (%9712:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033)], %9702:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)]) -> (%9713:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034)]) + linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034, solved=0), )] (%9713:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034)]) -> (%9714:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034)]) + linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034, solved=0), )] (%9714:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034)]) -> (%9714:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1035, solved=0))] (%9714:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034)]) -> (%9715:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036)]) + cf.ReturnOp (%9715:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036)], %9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)], %9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) -> () } } graph.SubGraphOp @model.layers.27.mlp [using_qnn:true, symbol:model.layers.27.mlp] { - (%9217:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1009)]) -> (%9222:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1009), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1012), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1011))] (%9217:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1009)]) -> (%9218:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1012)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1012), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1013), )] (%9218:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1012)]) -> (%9219:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1013)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1009), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1015), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1014))] (%9217:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1009)]) -> (%9220:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1015)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1013), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1015), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1013), )] (%9219:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1013)], %9220:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1015)]) -> (%9221:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1013)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1013), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1016))] (%9221:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1013)]) -> (%9222:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)]) - cf.ReturnOp (%9222:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=1017)]) -> () + (%9717:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1037)]) -> (%9723:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1045)]) { + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1037, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1039, solved=0))] (%9717:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1037)]) -> (%9718:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1037, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1041, solved=0))] (%9717:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1037)]) -> (%9719:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042)]) + linalg.CPU.SigmoidOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1043, solved=0), )] (%9719:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042)]) -> (%9720:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1043)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1043, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042, solved=0), )] (%9719:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042)], %9720:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1043)]) -> (%9721:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042)]) + linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042, solved=0), )] (%9721:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042)], %9718:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040)]) -> (%9722:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042)]) + linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1045, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1044, solved=0))] (%9722:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042)]) -> (%9723:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1045)]) + cf.ReturnOp (%9723:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1045)]) -> () } } // ╔═════╗ diff --git a/examples/qwen3_qnn_aot/qwen3_qnn_aot_quant_recipe.mir b/examples/qwen3_qnn_aot/qwen3_qnn_aot_quant_recipe.mir index af9a88521..f498128cc 100644 --- a/examples/qwen3_qnn_aot/qwen3_qnn_aot_quant_recipe.mir +++ b/examples/qwen3_qnn_aot/qwen3_qnn_aot_quant_recipe.mir @@ -1,1902 +1,9 @@ @main () -> () { - graph.SubGraphOp @init [symbol:init] { - () -> () { - tensor.CPU.register () -> (%105:tensor<[151936, 2048], Float32, CPU>[@model.embed_tokens.weight][symbol:model.embed_tokens.weight])[symbol:model.embed_tokens.weight] - tensor.CPU.register () -> (%76:tensor<[2048, 2048], Float32, CPU>[@model.layers.0.self_attn.q_proj.weight][symbol:model.layers.0.self_attn.q_proj.weight])[symbol:model.layers.0.self_attn.q_proj.weight] - tensor.CPU.register () -> (%133:tensor<[1024, 2048], Float32, CPU>[@model.layers.0.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=66), symbol:model.layers.0.self_attn.k_proj.weight])[symbol:model.layers.0.self_attn.k_proj.weight] - tensor.CPU.register () -> (%179:tensor<[1024, 2048], Float32, CPU>[@model.layers.0.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=68), symbol:model.layers.0.self_attn.v_proj.weight])[symbol:model.layers.0.self_attn.v_proj.weight] - tensor.CPU.register () -> (%269:tensor<[2048, 2048], Float32, CPU>[@model.layers.0.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=85), symbol:model.layers.0.self_attn.o_proj.weight])[symbol:model.layers.0.self_attn.o_proj.weight] - tensor.CPU.register () -> (%9:tensor<[6144, 2048], Float32, CPU>[@model.layers.0.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=88), symbol:model.layers.0.mlp.gate_proj.weight])[symbol:model.layers.0.mlp.gate_proj.weight] - tensor.CPU.register () -> (%111:tensor<[6144, 2048], Float32, CPU>[@model.layers.0.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=91), symbol:model.layers.0.mlp.up_proj.weight])[symbol:model.layers.0.mlp.up_proj.weight] - tensor.CPU.register () -> (%184:tensor<[2048, 6144], Float32, CPU>[@model.layers.0.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=93), symbol:model.layers.0.mlp.down_proj.weight])[symbol:model.layers.0.mlp.down_proj.weight] - tensor.CPU.register () -> (%285:tensor<[2048, 2048], Float32, CPU>[@model.layers.1.self_attn.q_proj.weight][symbol:model.layers.1.self_attn.q_proj.weight])[symbol:model.layers.1.self_attn.q_proj.weight] - tensor.CPU.register () -> (%32:tensor<[1024, 2048], Float32, CPU>[@model.layers.1.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=96), symbol:model.layers.1.self_attn.k_proj.weight])[symbol:model.layers.1.self_attn.k_proj.weight] - tensor.CPU.register () -> (%154:tensor<[1024, 2048], Float32, CPU>[@model.layers.1.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=98), symbol:model.layers.1.self_attn.v_proj.weight])[symbol:model.layers.1.self_attn.v_proj.weight] - tensor.CPU.register () -> (%20:tensor<[2048, 2048], Float32, CPU>[@model.layers.1.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=115), symbol:model.layers.1.self_attn.o_proj.weight])[symbol:model.layers.1.self_attn.o_proj.weight] - tensor.CPU.register () -> (%245:tensor<[6144, 2048], Float32, CPU>[@model.layers.1.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=118), symbol:model.layers.1.mlp.gate_proj.weight])[symbol:model.layers.1.mlp.gate_proj.weight] - tensor.CPU.register () -> (%230:tensor<[6144, 2048], Float32, CPU>[@model.layers.1.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=121), symbol:model.layers.1.mlp.up_proj.weight])[symbol:model.layers.1.mlp.up_proj.weight] - tensor.CPU.register () -> (%43:tensor<[2048, 6144], Float32, CPU>[@model.layers.1.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=123), symbol:model.layers.1.mlp.down_proj.weight])[symbol:model.layers.1.mlp.down_proj.weight] - tensor.CPU.register () -> (%221:tensor<[2048, 2048], Float32, CPU>[@model.layers.2.self_attn.q_proj.weight][symbol:model.layers.2.self_attn.q_proj.weight])[symbol:model.layers.2.self_attn.q_proj.weight] - tensor.CPU.register () -> (%103:tensor<[1024, 2048], Float32, CPU>[@model.layers.2.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=126), symbol:model.layers.2.self_attn.k_proj.weight])[symbol:model.layers.2.self_attn.k_proj.weight] - tensor.CPU.register () -> (%47:tensor<[1024, 2048], Float32, CPU>[@model.layers.2.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=128), symbol:model.layers.2.self_attn.v_proj.weight])[symbol:model.layers.2.self_attn.v_proj.weight] - tensor.CPU.register () -> (%85:tensor<[2048, 2048], Float32, CPU>[@model.layers.2.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=145), symbol:model.layers.2.self_attn.o_proj.weight])[symbol:model.layers.2.self_attn.o_proj.weight] - tensor.CPU.register () -> (%252:tensor<[6144, 2048], Float32, CPU>[@model.layers.2.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=148), symbol:model.layers.2.mlp.gate_proj.weight])[symbol:model.layers.2.mlp.gate_proj.weight] - tensor.CPU.register () -> (%24:tensor<[6144, 2048], Float32, CPU>[@model.layers.2.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=151), symbol:model.layers.2.mlp.up_proj.weight])[symbol:model.layers.2.mlp.up_proj.weight] - tensor.CPU.register () -> (%28:tensor<[2048, 6144], Float32, CPU>[@model.layers.2.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=153), symbol:model.layers.2.mlp.down_proj.weight])[symbol:model.layers.2.mlp.down_proj.weight] - tensor.CPU.register () -> (%283:tensor<[2048, 2048], Float32, CPU>[@model.layers.3.self_attn.q_proj.weight][symbol:model.layers.3.self_attn.q_proj.weight])[symbol:model.layers.3.self_attn.q_proj.weight] - tensor.CPU.register () -> (%48:tensor<[1024, 2048], Float32, CPU>[@model.layers.3.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=156), symbol:model.layers.3.self_attn.k_proj.weight])[symbol:model.layers.3.self_attn.k_proj.weight] - tensor.CPU.register () -> (%244:tensor<[1024, 2048], Float32, CPU>[@model.layers.3.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=158), symbol:model.layers.3.self_attn.v_proj.weight])[symbol:model.layers.3.self_attn.v_proj.weight] - tensor.CPU.register () -> (%301:tensor<[2048, 2048], Float32, CPU>[@model.layers.3.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=175), symbol:model.layers.3.self_attn.o_proj.weight])[symbol:model.layers.3.self_attn.o_proj.weight] - tensor.CPU.register () -> (%129:tensor<[6144, 2048], Float32, CPU>[@model.layers.3.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=178), symbol:model.layers.3.mlp.gate_proj.weight])[symbol:model.layers.3.mlp.gate_proj.weight] - tensor.CPU.register () -> (%188:tensor<[6144, 2048], Float32, CPU>[@model.layers.3.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=181), symbol:model.layers.3.mlp.up_proj.weight])[symbol:model.layers.3.mlp.up_proj.weight] - tensor.CPU.register () -> (%97:tensor<[2048, 6144], Float32, CPU>[@model.layers.3.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=183), symbol:model.layers.3.mlp.down_proj.weight])[symbol:model.layers.3.mlp.down_proj.weight] - tensor.CPU.register () -> (%164:tensor<[2048, 2048], Float32, CPU>[@model.layers.4.self_attn.q_proj.weight][symbol:model.layers.4.self_attn.q_proj.weight])[symbol:model.layers.4.self_attn.q_proj.weight] - tensor.CPU.register () -> (%148:tensor<[1024, 2048], Float32, CPU>[@model.layers.4.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=186), symbol:model.layers.4.self_attn.k_proj.weight])[symbol:model.layers.4.self_attn.k_proj.weight] - tensor.CPU.register () -> (%279:tensor<[1024, 2048], Float32, CPU>[@model.layers.4.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=188), symbol:model.layers.4.self_attn.v_proj.weight])[symbol:model.layers.4.self_attn.v_proj.weight] - tensor.CPU.register () -> (%91:tensor<[2048, 2048], Float32, CPU>[@model.layers.4.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=205), symbol:model.layers.4.self_attn.o_proj.weight])[symbol:model.layers.4.self_attn.o_proj.weight] - tensor.CPU.register () -> (%189:tensor<[6144, 2048], Float32, CPU>[@model.layers.4.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=208), symbol:model.layers.4.mlp.gate_proj.weight])[symbol:model.layers.4.mlp.gate_proj.weight] - tensor.CPU.register () -> (%156:tensor<[6144, 2048], Float32, CPU>[@model.layers.4.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=211), symbol:model.layers.4.mlp.up_proj.weight])[symbol:model.layers.4.mlp.up_proj.weight] - tensor.CPU.register () -> (%153:tensor<[2048, 6144], Float32, CPU>[@model.layers.4.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=213), symbol:model.layers.4.mlp.down_proj.weight])[symbol:model.layers.4.mlp.down_proj.weight] - tensor.CPU.register () -> (%78:tensor<[2048, 2048], Float32, CPU>[@model.layers.5.self_attn.q_proj.weight][symbol:model.layers.5.self_attn.q_proj.weight])[symbol:model.layers.5.self_attn.q_proj.weight] - tensor.CPU.register () -> (%72:tensor<[1024, 2048], Float32, CPU>[@model.layers.5.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=216), symbol:model.layers.5.self_attn.k_proj.weight])[symbol:model.layers.5.self_attn.k_proj.weight] - tensor.CPU.register () -> (%289:tensor<[1024, 2048], Float32, CPU>[@model.layers.5.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=218), symbol:model.layers.5.self_attn.v_proj.weight])[symbol:model.layers.5.self_attn.v_proj.weight] - tensor.CPU.register () -> (%264:tensor<[2048, 2048], Float32, CPU>[@model.layers.5.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=235), symbol:model.layers.5.self_attn.o_proj.weight])[symbol:model.layers.5.self_attn.o_proj.weight] - tensor.CPU.register () -> (%4:tensor<[6144, 2048], Float32, CPU>[@model.layers.5.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=238), symbol:model.layers.5.mlp.gate_proj.weight])[symbol:model.layers.5.mlp.gate_proj.weight] - tensor.CPU.register () -> (%308:tensor<[6144, 2048], Float32, CPU>[@model.layers.5.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=241), symbol:model.layers.5.mlp.up_proj.weight])[symbol:model.layers.5.mlp.up_proj.weight] - tensor.CPU.register () -> (%74:tensor<[2048, 6144], Float32, CPU>[@model.layers.5.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=243), symbol:model.layers.5.mlp.down_proj.weight])[symbol:model.layers.5.mlp.down_proj.weight] - tensor.CPU.register () -> (%59:tensor<[2048, 2048], Float32, CPU>[@model.layers.6.self_attn.q_proj.weight][symbol:model.layers.6.self_attn.q_proj.weight])[symbol:model.layers.6.self_attn.q_proj.weight] - tensor.CPU.register () -> (%208:tensor<[1024, 2048], Float32, CPU>[@model.layers.6.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=246), symbol:model.layers.6.self_attn.k_proj.weight])[symbol:model.layers.6.self_attn.k_proj.weight] - tensor.CPU.register () -> (%238:tensor<[1024, 2048], Float32, CPU>[@model.layers.6.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=248), symbol:model.layers.6.self_attn.v_proj.weight])[symbol:model.layers.6.self_attn.v_proj.weight] - tensor.CPU.register () -> (%52:tensor<[2048, 2048], Float32, CPU>[@model.layers.6.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=265), symbol:model.layers.6.self_attn.o_proj.weight])[symbol:model.layers.6.self_attn.o_proj.weight] - tensor.CPU.register () -> (%80:tensor<[6144, 2048], Float32, CPU>[@model.layers.6.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=268), symbol:model.layers.6.mlp.gate_proj.weight])[symbol:model.layers.6.mlp.gate_proj.weight] - tensor.CPU.register () -> (%276:tensor<[6144, 2048], Float32, CPU>[@model.layers.6.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=271), symbol:model.layers.6.mlp.up_proj.weight])[symbol:model.layers.6.mlp.up_proj.weight] - tensor.CPU.register () -> (%227:tensor<[2048, 6144], Float32, CPU>[@model.layers.6.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=273), symbol:model.layers.6.mlp.down_proj.weight])[symbol:model.layers.6.mlp.down_proj.weight] - tensor.CPU.register () -> (%287:tensor<[2048, 2048], Float32, CPU>[@model.layers.7.self_attn.q_proj.weight][symbol:model.layers.7.self_attn.q_proj.weight])[symbol:model.layers.7.self_attn.q_proj.weight] - tensor.CPU.register () -> (%135:tensor<[1024, 2048], Float32, CPU>[@model.layers.7.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=276), symbol:model.layers.7.self_attn.k_proj.weight])[symbol:model.layers.7.self_attn.k_proj.weight] - tensor.CPU.register () -> (%300:tensor<[1024, 2048], Float32, CPU>[@model.layers.7.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=278), symbol:model.layers.7.self_attn.v_proj.weight])[symbol:model.layers.7.self_attn.v_proj.weight] - tensor.CPU.register () -> (%251:tensor<[2048, 2048], Float32, CPU>[@model.layers.7.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=295), symbol:model.layers.7.self_attn.o_proj.weight])[symbol:model.layers.7.self_attn.o_proj.weight] - tensor.CPU.register () -> (%155:tensor<[6144, 2048], Float32, CPU>[@model.layers.7.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=298), symbol:model.layers.7.mlp.gate_proj.weight])[symbol:model.layers.7.mlp.gate_proj.weight] - tensor.CPU.register () -> (%218:tensor<[6144, 2048], Float32, CPU>[@model.layers.7.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=301), symbol:model.layers.7.mlp.up_proj.weight])[symbol:model.layers.7.mlp.up_proj.weight] - tensor.CPU.register () -> (%275:tensor<[2048, 6144], Float32, CPU>[@model.layers.7.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=303), symbol:model.layers.7.mlp.down_proj.weight])[symbol:model.layers.7.mlp.down_proj.weight] - tensor.CPU.register () -> (%165:tensor<[2048, 2048], Float32, CPU>[@model.layers.8.self_attn.q_proj.weight][symbol:model.layers.8.self_attn.q_proj.weight])[symbol:model.layers.8.self_attn.q_proj.weight] - tensor.CPU.register () -> (%194:tensor<[1024, 2048], Float32, CPU>[@model.layers.8.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=306), symbol:model.layers.8.self_attn.k_proj.weight])[symbol:model.layers.8.self_attn.k_proj.weight] - tensor.CPU.register () -> (%181:tensor<[1024, 2048], Float32, CPU>[@model.layers.8.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=308), symbol:model.layers.8.self_attn.v_proj.weight])[symbol:model.layers.8.self_attn.v_proj.weight] - tensor.CPU.register () -> (%197:tensor<[2048, 2048], Float32, CPU>[@model.layers.8.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=325), symbol:model.layers.8.self_attn.o_proj.weight])[symbol:model.layers.8.self_attn.o_proj.weight] - tensor.CPU.register () -> (%110:tensor<[6144, 2048], Float32, CPU>[@model.layers.8.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=328), symbol:model.layers.8.mlp.gate_proj.weight])[symbol:model.layers.8.mlp.gate_proj.weight] - tensor.CPU.register () -> (%236:tensor<[6144, 2048], Float32, CPU>[@model.layers.8.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=331), symbol:model.layers.8.mlp.up_proj.weight])[symbol:model.layers.8.mlp.up_proj.weight] - tensor.CPU.register () -> (%106:tensor<[2048, 6144], Float32, CPU>[@model.layers.8.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=333), symbol:model.layers.8.mlp.down_proj.weight])[symbol:model.layers.8.mlp.down_proj.weight] - tensor.CPU.register () -> (%235:tensor<[2048, 2048], Float32, CPU>[@model.layers.9.self_attn.q_proj.weight][symbol:model.layers.9.self_attn.q_proj.weight])[symbol:model.layers.9.self_attn.q_proj.weight] - tensor.CPU.register () -> (%69:tensor<[1024, 2048], Float32, CPU>[@model.layers.9.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=336), symbol:model.layers.9.self_attn.k_proj.weight])[symbol:model.layers.9.self_attn.k_proj.weight] - tensor.CPU.register () -> (%120:tensor<[1024, 2048], Float32, CPU>[@model.layers.9.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=338), symbol:model.layers.9.self_attn.v_proj.weight])[symbol:model.layers.9.self_attn.v_proj.weight] - tensor.CPU.register () -> (%205:tensor<[2048, 2048], Float32, CPU>[@model.layers.9.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=355), symbol:model.layers.9.self_attn.o_proj.weight])[symbol:model.layers.9.self_attn.o_proj.weight] - tensor.CPU.register () -> (%263:tensor<[6144, 2048], Float32, CPU>[@model.layers.9.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=358), symbol:model.layers.9.mlp.gate_proj.weight])[symbol:model.layers.9.mlp.gate_proj.weight] - tensor.CPU.register () -> (%102:tensor<[6144, 2048], Float32, CPU>[@model.layers.9.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=361), symbol:model.layers.9.mlp.up_proj.weight])[symbol:model.layers.9.mlp.up_proj.weight] - tensor.CPU.register () -> (%136:tensor<[2048, 6144], Float32, CPU>[@model.layers.9.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=363), symbol:model.layers.9.mlp.down_proj.weight])[symbol:model.layers.9.mlp.down_proj.weight] - tensor.CPU.register () -> (%278:tensor<[2048, 2048], Float32, CPU>[@model.layers.10.self_attn.q_proj.weight][symbol:model.layers.10.self_attn.q_proj.weight])[symbol:model.layers.10.self_attn.q_proj.weight] - tensor.CPU.register () -> (%182:tensor<[1024, 2048], Float32, CPU>[@model.layers.10.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=366), symbol:model.layers.10.self_attn.k_proj.weight])[symbol:model.layers.10.self_attn.k_proj.weight] - tensor.CPU.register () -> (%138:tensor<[1024, 2048], Float32, CPU>[@model.layers.10.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=368), symbol:model.layers.10.self_attn.v_proj.weight])[symbol:model.layers.10.self_attn.v_proj.weight] - tensor.CPU.register () -> (%233:tensor<[2048, 2048], Float32, CPU>[@model.layers.10.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=385), symbol:model.layers.10.self_attn.o_proj.weight])[symbol:model.layers.10.self_attn.o_proj.weight] - tensor.CPU.register () -> (%124:tensor<[6144, 2048], Float32, CPU>[@model.layers.10.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=388), symbol:model.layers.10.mlp.gate_proj.weight])[symbol:model.layers.10.mlp.gate_proj.weight] - tensor.CPU.register () -> (%261:tensor<[6144, 2048], Float32, CPU>[@model.layers.10.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=391), symbol:model.layers.10.mlp.up_proj.weight])[symbol:model.layers.10.mlp.up_proj.weight] - tensor.CPU.register () -> (%45:tensor<[2048, 6144], Float32, CPU>[@model.layers.10.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=393), symbol:model.layers.10.mlp.down_proj.weight])[symbol:model.layers.10.mlp.down_proj.weight] - tensor.CPU.register () -> (%274:tensor<[2048, 2048], Float32, CPU>[@model.layers.11.self_attn.q_proj.weight][symbol:model.layers.11.self_attn.q_proj.weight])[symbol:model.layers.11.self_attn.q_proj.weight] - tensor.CPU.register () -> (%157:tensor<[1024, 2048], Float32, CPU>[@model.layers.11.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=396), symbol:model.layers.11.self_attn.k_proj.weight])[symbol:model.layers.11.self_attn.k_proj.weight] - tensor.CPU.register () -> (%63:tensor<[1024, 2048], Float32, CPU>[@model.layers.11.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=398), symbol:model.layers.11.self_attn.v_proj.weight])[symbol:model.layers.11.self_attn.v_proj.weight] - tensor.CPU.register () -> (%118:tensor<[2048, 2048], Float32, CPU>[@model.layers.11.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=415), symbol:model.layers.11.self_attn.o_proj.weight])[symbol:model.layers.11.self_attn.o_proj.weight] - tensor.CPU.register () -> (%207:tensor<[6144, 2048], Float32, CPU>[@model.layers.11.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=418), symbol:model.layers.11.mlp.gate_proj.weight])[symbol:model.layers.11.mlp.gate_proj.weight] - tensor.CPU.register () -> (%226:tensor<[6144, 2048], Float32, CPU>[@model.layers.11.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=421), symbol:model.layers.11.mlp.up_proj.weight])[symbol:model.layers.11.mlp.up_proj.weight] - tensor.CPU.register () -> (%224:tensor<[2048, 6144], Float32, CPU>[@model.layers.11.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=423), symbol:model.layers.11.mlp.down_proj.weight])[symbol:model.layers.11.mlp.down_proj.weight] - tensor.CPU.register () -> (%217:tensor<[2048, 2048], Float32, CPU>[@model.layers.12.self_attn.q_proj.weight][symbol:model.layers.12.self_attn.q_proj.weight])[symbol:model.layers.12.self_attn.q_proj.weight] - tensor.CPU.register () -> (%297:tensor<[1024, 2048], Float32, CPU>[@model.layers.12.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=426), symbol:model.layers.12.self_attn.k_proj.weight])[symbol:model.layers.12.self_attn.k_proj.weight] - tensor.CPU.register () -> (%94:tensor<[1024, 2048], Float32, CPU>[@model.layers.12.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=428), symbol:model.layers.12.self_attn.v_proj.weight])[symbol:model.layers.12.self_attn.v_proj.weight] - tensor.CPU.register () -> (%49:tensor<[2048, 2048], Float32, CPU>[@model.layers.12.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=445), symbol:model.layers.12.self_attn.o_proj.weight])[symbol:model.layers.12.self_attn.o_proj.weight] - tensor.CPU.register () -> (%262:tensor<[6144, 2048], Float32, CPU>[@model.layers.12.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=448), symbol:model.layers.12.mlp.gate_proj.weight])[symbol:model.layers.12.mlp.gate_proj.weight] - tensor.CPU.register () -> (%255:tensor<[6144, 2048], Float32, CPU>[@model.layers.12.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=451), symbol:model.layers.12.mlp.up_proj.weight])[symbol:model.layers.12.mlp.up_proj.weight] - tensor.CPU.register () -> (%22:tensor<[2048, 6144], Float32, CPU>[@model.layers.12.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=453), symbol:model.layers.12.mlp.down_proj.weight])[symbol:model.layers.12.mlp.down_proj.weight] - tensor.CPU.register () -> (%114:tensor<[2048, 2048], Float32, CPU>[@model.layers.13.self_attn.q_proj.weight][symbol:model.layers.13.self_attn.q_proj.weight])[symbol:model.layers.13.self_attn.q_proj.weight] - tensor.CPU.register () -> (%152:tensor<[1024, 2048], Float32, CPU>[@model.layers.13.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=456), symbol:model.layers.13.self_attn.k_proj.weight])[symbol:model.layers.13.self_attn.k_proj.weight] - tensor.CPU.register () -> (%15:tensor<[1024, 2048], Float32, CPU>[@model.layers.13.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=458), symbol:model.layers.13.self_attn.v_proj.weight])[symbol:model.layers.13.self_attn.v_proj.weight] - tensor.CPU.register () -> (%250:tensor<[2048, 2048], Float32, CPU>[@model.layers.13.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=475), symbol:model.layers.13.self_attn.o_proj.weight])[symbol:model.layers.13.self_attn.o_proj.weight] - tensor.CPU.register () -> (%247:tensor<[6144, 2048], Float32, CPU>[@model.layers.13.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=478), symbol:model.layers.13.mlp.gate_proj.weight])[symbol:model.layers.13.mlp.gate_proj.weight] - tensor.CPU.register () -> (%98:tensor<[6144, 2048], Float32, CPU>[@model.layers.13.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=481), symbol:model.layers.13.mlp.up_proj.weight])[symbol:model.layers.13.mlp.up_proj.weight] - tensor.CPU.register () -> (%193:tensor<[2048, 6144], Float32, CPU>[@model.layers.13.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=483), symbol:model.layers.13.mlp.down_proj.weight])[symbol:model.layers.13.mlp.down_proj.weight] - tensor.CPU.register () -> (%209:tensor<[2048, 2048], Float32, CPU>[@model.layers.14.self_attn.q_proj.weight][symbol:model.layers.14.self_attn.q_proj.weight])[symbol:model.layers.14.self_attn.q_proj.weight] - tensor.CPU.register () -> (%38:tensor<[1024, 2048], Float32, CPU>[@model.layers.14.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=486), symbol:model.layers.14.self_attn.k_proj.weight])[symbol:model.layers.14.self_attn.k_proj.weight] - tensor.CPU.register () -> (%232:tensor<[1024, 2048], Float32, CPU>[@model.layers.14.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=488), symbol:model.layers.14.self_attn.v_proj.weight])[symbol:model.layers.14.self_attn.v_proj.weight] - tensor.CPU.register () -> (%168:tensor<[2048, 2048], Float32, CPU>[@model.layers.14.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=505), symbol:model.layers.14.self_attn.o_proj.weight])[symbol:model.layers.14.self_attn.o_proj.weight] - tensor.CPU.register () -> (%37:tensor<[6144, 2048], Float32, CPU>[@model.layers.14.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=508), symbol:model.layers.14.mlp.gate_proj.weight])[symbol:model.layers.14.mlp.gate_proj.weight] - tensor.CPU.register () -> (%147:tensor<[6144, 2048], Float32, CPU>[@model.layers.14.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=511), symbol:model.layers.14.mlp.up_proj.weight])[symbol:model.layers.14.mlp.up_proj.weight] - tensor.CPU.register () -> (%163:tensor<[2048, 6144], Float32, CPU>[@model.layers.14.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=513), symbol:model.layers.14.mlp.down_proj.weight])[symbol:model.layers.14.mlp.down_proj.weight] - tensor.CPU.register () -> (%46:tensor<[2048, 2048], Float32, CPU>[@model.layers.15.self_attn.q_proj.weight][symbol:model.layers.15.self_attn.q_proj.weight])[symbol:model.layers.15.self_attn.q_proj.weight] - tensor.CPU.register () -> (%268:tensor<[1024, 2048], Float32, CPU>[@model.layers.15.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=516), symbol:model.layers.15.self_attn.k_proj.weight])[symbol:model.layers.15.self_attn.k_proj.weight] - tensor.CPU.register () -> (%117:tensor<[1024, 2048], Float32, CPU>[@model.layers.15.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=518), symbol:model.layers.15.self_attn.v_proj.weight])[symbol:model.layers.15.self_attn.v_proj.weight] - tensor.CPU.register () -> (%303:tensor<[2048, 2048], Float32, CPU>[@model.layers.15.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=535), symbol:model.layers.15.self_attn.o_proj.weight])[symbol:model.layers.15.self_attn.o_proj.weight] - tensor.CPU.register () -> (%260:tensor<[6144, 2048], Float32, CPU>[@model.layers.15.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=538), symbol:model.layers.15.mlp.gate_proj.weight])[symbol:model.layers.15.mlp.gate_proj.weight] - tensor.CPU.register () -> (%42:tensor<[6144, 2048], Float32, CPU>[@model.layers.15.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=541), symbol:model.layers.15.mlp.up_proj.weight])[symbol:model.layers.15.mlp.up_proj.weight] - tensor.CPU.register () -> (%290:tensor<[2048, 6144], Float32, CPU>[@model.layers.15.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=543), symbol:model.layers.15.mlp.down_proj.weight])[symbol:model.layers.15.mlp.down_proj.weight] - tensor.CPU.register () -> (%17:tensor<[2048, 2048], Float32, CPU>[@model.layers.16.self_attn.q_proj.weight][symbol:model.layers.16.self_attn.q_proj.weight])[symbol:model.layers.16.self_attn.q_proj.weight] - tensor.CPU.register () -> (%228:tensor<[1024, 2048], Float32, CPU>[@model.layers.16.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=546), symbol:model.layers.16.self_attn.k_proj.weight])[symbol:model.layers.16.self_attn.k_proj.weight] - tensor.CPU.register () -> (%66:tensor<[1024, 2048], Float32, CPU>[@model.layers.16.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=548), symbol:model.layers.16.self_attn.v_proj.weight])[symbol:model.layers.16.self_attn.v_proj.weight] - tensor.CPU.register () -> (%211:tensor<[2048, 2048], Float32, CPU>[@model.layers.16.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=565), symbol:model.layers.16.self_attn.o_proj.weight])[symbol:model.layers.16.self_attn.o_proj.weight] - tensor.CPU.register () -> (%130:tensor<[6144, 2048], Float32, CPU>[@model.layers.16.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=568), symbol:model.layers.16.mlp.gate_proj.weight])[symbol:model.layers.16.mlp.gate_proj.weight] - tensor.CPU.register () -> (%79:tensor<[6144, 2048], Float32, CPU>[@model.layers.16.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=571), symbol:model.layers.16.mlp.up_proj.weight])[symbol:model.layers.16.mlp.up_proj.weight] - tensor.CPU.register () -> (%248:tensor<[2048, 6144], Float32, CPU>[@model.layers.16.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=573), symbol:model.layers.16.mlp.down_proj.weight])[symbol:model.layers.16.mlp.down_proj.weight] - tensor.CPU.register () -> (%64:tensor<[2048, 2048], Float32, CPU>[@model.layers.17.self_attn.q_proj.weight][symbol:model.layers.17.self_attn.q_proj.weight])[symbol:model.layers.17.self_attn.q_proj.weight] - tensor.CPU.register () -> (%237:tensor<[1024, 2048], Float32, CPU>[@model.layers.17.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=576), symbol:model.layers.17.self_attn.k_proj.weight])[symbol:model.layers.17.self_attn.k_proj.weight] - tensor.CPU.register () -> (%6:tensor<[1024, 2048], Float32, CPU>[@model.layers.17.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=578), symbol:model.layers.17.self_attn.v_proj.weight])[symbol:model.layers.17.self_attn.v_proj.weight] - tensor.CPU.register () -> (%125:tensor<[2048, 2048], Float32, CPU>[@model.layers.17.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=595), symbol:model.layers.17.self_attn.o_proj.weight])[symbol:model.layers.17.self_attn.o_proj.weight] - tensor.CPU.register () -> (%177:tensor<[6144, 2048], Float32, CPU>[@model.layers.17.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=598), symbol:model.layers.17.mlp.gate_proj.weight])[symbol:model.layers.17.mlp.gate_proj.weight] - tensor.CPU.register () -> (%26:tensor<[6144, 2048], Float32, CPU>[@model.layers.17.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=601), symbol:model.layers.17.mlp.up_proj.weight])[symbol:model.layers.17.mlp.up_proj.weight] - tensor.CPU.register () -> (%25:tensor<[2048, 6144], Float32, CPU>[@model.layers.17.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=603), symbol:model.layers.17.mlp.down_proj.weight])[symbol:model.layers.17.mlp.down_proj.weight] - tensor.CPU.register () -> (%273:tensor<[2048, 2048], Float32, CPU>[@model.layers.18.self_attn.q_proj.weight][symbol:model.layers.18.self_attn.q_proj.weight])[symbol:model.layers.18.self_attn.q_proj.weight] - tensor.CPU.register () -> (%284:tensor<[1024, 2048], Float32, CPU>[@model.layers.18.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=606), symbol:model.layers.18.self_attn.k_proj.weight])[symbol:model.layers.18.self_attn.k_proj.weight] - tensor.CPU.register () -> (%18:tensor<[1024, 2048], Float32, CPU>[@model.layers.18.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=608), symbol:model.layers.18.self_attn.v_proj.weight])[symbol:model.layers.18.self_attn.v_proj.weight] - tensor.CPU.register () -> (%2:tensor<[2048, 2048], Float32, CPU>[@model.layers.18.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=625), symbol:model.layers.18.self_attn.o_proj.weight])[symbol:model.layers.18.self_attn.o_proj.weight] - tensor.CPU.register () -> (%166:tensor<[6144, 2048], Float32, CPU>[@model.layers.18.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=628), symbol:model.layers.18.mlp.gate_proj.weight])[symbol:model.layers.18.mlp.gate_proj.weight] - tensor.CPU.register () -> (%271:tensor<[6144, 2048], Float32, CPU>[@model.layers.18.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=631), symbol:model.layers.18.mlp.up_proj.weight])[symbol:model.layers.18.mlp.up_proj.weight] - tensor.CPU.register () -> (%112:tensor<[2048, 6144], Float32, CPU>[@model.layers.18.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=633), symbol:model.layers.18.mlp.down_proj.weight])[symbol:model.layers.18.mlp.down_proj.weight] - tensor.CPU.register () -> (%8:tensor<[2048, 2048], Float32, CPU>[@model.layers.19.self_attn.q_proj.weight][symbol:model.layers.19.self_attn.q_proj.weight])[symbol:model.layers.19.self_attn.q_proj.weight] - tensor.CPU.register () -> (%286:tensor<[1024, 2048], Float32, CPU>[@model.layers.19.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=636), symbol:model.layers.19.self_attn.k_proj.weight])[symbol:model.layers.19.self_attn.k_proj.weight] - tensor.CPU.register () -> (%50:tensor<[1024, 2048], Float32, CPU>[@model.layers.19.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=638), symbol:model.layers.19.self_attn.v_proj.weight])[symbol:model.layers.19.self_attn.v_proj.weight] - tensor.CPU.register () -> (%58:tensor<[2048, 2048], Float32, CPU>[@model.layers.19.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=655), symbol:model.layers.19.self_attn.o_proj.weight])[symbol:model.layers.19.self_attn.o_proj.weight] - tensor.CPU.register () -> (%281:tensor<[6144, 2048], Float32, CPU>[@model.layers.19.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=658), symbol:model.layers.19.mlp.gate_proj.weight])[symbol:model.layers.19.mlp.gate_proj.weight] - tensor.CPU.register () -> (%82:tensor<[6144, 2048], Float32, CPU>[@model.layers.19.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=661), symbol:model.layers.19.mlp.up_proj.weight])[symbol:model.layers.19.mlp.up_proj.weight] - tensor.CPU.register () -> (%173:tensor<[2048, 6144], Float32, CPU>[@model.layers.19.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=663), symbol:model.layers.19.mlp.down_proj.weight])[symbol:model.layers.19.mlp.down_proj.weight] - tensor.CPU.register () -> (%280:tensor<[2048, 2048], Float32, CPU>[@model.layers.20.self_attn.q_proj.weight][symbol:model.layers.20.self_attn.q_proj.weight])[symbol:model.layers.20.self_attn.q_proj.weight] - tensor.CPU.register () -> (%253:tensor<[1024, 2048], Float32, CPU>[@model.layers.20.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=666), symbol:model.layers.20.self_attn.k_proj.weight])[symbol:model.layers.20.self_attn.k_proj.weight] - tensor.CPU.register () -> (%239:tensor<[1024, 2048], Float32, CPU>[@model.layers.20.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=668), symbol:model.layers.20.self_attn.v_proj.weight])[symbol:model.layers.20.self_attn.v_proj.weight] - tensor.CPU.register () -> (%41:tensor<[2048, 2048], Float32, CPU>[@model.layers.20.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=685), symbol:model.layers.20.self_attn.o_proj.weight])[symbol:model.layers.20.self_attn.o_proj.weight] - tensor.CPU.register () -> (%172:tensor<[6144, 2048], Float32, CPU>[@model.layers.20.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=688), symbol:model.layers.20.mlp.gate_proj.weight])[symbol:model.layers.20.mlp.gate_proj.weight] - tensor.CPU.register () -> (%299:tensor<[6144, 2048], Float32, CPU>[@model.layers.20.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=691), symbol:model.layers.20.mlp.up_proj.weight])[symbol:model.layers.20.mlp.up_proj.weight] - tensor.CPU.register () -> (%123:tensor<[2048, 6144], Float32, CPU>[@model.layers.20.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=693), symbol:model.layers.20.mlp.down_proj.weight])[symbol:model.layers.20.mlp.down_proj.weight] - tensor.CPU.register () -> (%295:tensor<[2048, 2048], Float32, CPU>[@model.layers.21.self_attn.q_proj.weight][symbol:model.layers.21.self_attn.q_proj.weight])[symbol:model.layers.21.self_attn.q_proj.weight] - tensor.CPU.register () -> (%139:tensor<[1024, 2048], Float32, CPU>[@model.layers.21.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=696), symbol:model.layers.21.self_attn.k_proj.weight])[symbol:model.layers.21.self_attn.k_proj.weight] - tensor.CPU.register () -> (%142:tensor<[1024, 2048], Float32, CPU>[@model.layers.21.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=698), symbol:model.layers.21.self_attn.v_proj.weight])[symbol:model.layers.21.self_attn.v_proj.weight] - tensor.CPU.register () -> (%115:tensor<[2048, 2048], Float32, CPU>[@model.layers.21.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=715), symbol:model.layers.21.self_attn.o_proj.weight])[symbol:model.layers.21.self_attn.o_proj.weight] - tensor.CPU.register () -> (%259:tensor<[6144, 2048], Float32, CPU>[@model.layers.21.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=718), symbol:model.layers.21.mlp.gate_proj.weight])[symbol:model.layers.21.mlp.gate_proj.weight] - tensor.CPU.register () -> (%162:tensor<[6144, 2048], Float32, CPU>[@model.layers.21.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=721), symbol:model.layers.21.mlp.up_proj.weight])[symbol:model.layers.21.mlp.up_proj.weight] - tensor.CPU.register () -> (%183:tensor<[2048, 6144], Float32, CPU>[@model.layers.21.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=723), symbol:model.layers.21.mlp.down_proj.weight])[symbol:model.layers.21.mlp.down_proj.weight] - tensor.CPU.register () -> (%89:tensor<[2048, 2048], Float32, CPU>[@model.layers.22.self_attn.q_proj.weight][symbol:model.layers.22.self_attn.q_proj.weight])[symbol:model.layers.22.self_attn.q_proj.weight] - tensor.CPU.register () -> (%36:tensor<[1024, 2048], Float32, CPU>[@model.layers.22.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=726), symbol:model.layers.22.self_attn.k_proj.weight])[symbol:model.layers.22.self_attn.k_proj.weight] - tensor.CPU.register () -> (%204:tensor<[1024, 2048], Float32, CPU>[@model.layers.22.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=728), symbol:model.layers.22.self_attn.v_proj.weight])[symbol:model.layers.22.self_attn.v_proj.weight] - tensor.CPU.register () -> (%234:tensor<[2048, 2048], Float32, CPU>[@model.layers.22.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=745), symbol:model.layers.22.self_attn.o_proj.weight])[symbol:model.layers.22.self_attn.o_proj.weight] - tensor.CPU.register () -> (%198:tensor<[6144, 2048], Float32, CPU>[@model.layers.22.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=748), symbol:model.layers.22.mlp.gate_proj.weight])[symbol:model.layers.22.mlp.gate_proj.weight] - tensor.CPU.register () -> (%254:tensor<[6144, 2048], Float32, CPU>[@model.layers.22.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=751), symbol:model.layers.22.mlp.up_proj.weight])[symbol:model.layers.22.mlp.up_proj.weight] - tensor.CPU.register () -> (%31:tensor<[2048, 6144], Float32, CPU>[@model.layers.22.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=753), symbol:model.layers.22.mlp.down_proj.weight])[symbol:model.layers.22.mlp.down_proj.weight] - tensor.CPU.register () -> (%109:tensor<[2048, 2048], Float32, CPU>[@model.layers.23.self_attn.q_proj.weight][symbol:model.layers.23.self_attn.q_proj.weight])[symbol:model.layers.23.self_attn.q_proj.weight] - tensor.CPU.register () -> (%39:tensor<[1024, 2048], Float32, CPU>[@model.layers.23.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=756), symbol:model.layers.23.self_attn.k_proj.weight])[symbol:model.layers.23.self_attn.k_proj.weight] - tensor.CPU.register () -> (%83:tensor<[1024, 2048], Float32, CPU>[@model.layers.23.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=758), symbol:model.layers.23.self_attn.v_proj.weight])[symbol:model.layers.23.self_attn.v_proj.weight] - tensor.CPU.register () -> (%176:tensor<[2048, 2048], Float32, CPU>[@model.layers.23.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=775), symbol:model.layers.23.self_attn.o_proj.weight])[symbol:model.layers.23.self_attn.o_proj.weight] - tensor.CPU.register () -> (%169:tensor<[6144, 2048], Float32, CPU>[@model.layers.23.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=778), symbol:model.layers.23.mlp.gate_proj.weight])[symbol:model.layers.23.mlp.gate_proj.weight] - tensor.CPU.register () -> (%243:tensor<[6144, 2048], Float32, CPU>[@model.layers.23.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=781), symbol:model.layers.23.mlp.up_proj.weight])[symbol:model.layers.23.mlp.up_proj.weight] - tensor.CPU.register () -> (%149:tensor<[2048, 6144], Float32, CPU>[@model.layers.23.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=783), symbol:model.layers.23.mlp.down_proj.weight])[symbol:model.layers.23.mlp.down_proj.weight] - tensor.CPU.register () -> (%11:tensor<[2048, 2048], Float32, CPU>[@model.layers.24.self_attn.q_proj.weight][symbol:model.layers.24.self_attn.q_proj.weight])[symbol:model.layers.24.self_attn.q_proj.weight] - tensor.CPU.register () -> (%61:tensor<[1024, 2048], Float32, CPU>[@model.layers.24.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=786), symbol:model.layers.24.self_attn.k_proj.weight])[symbol:model.layers.24.self_attn.k_proj.weight] - tensor.CPU.register () -> (%81:tensor<[1024, 2048], Float32, CPU>[@model.layers.24.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=788), symbol:model.layers.24.self_attn.v_proj.weight])[symbol:model.layers.24.self_attn.v_proj.weight] - tensor.CPU.register () -> (%127:tensor<[2048, 2048], Float32, CPU>[@model.layers.24.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=805), symbol:model.layers.24.self_attn.o_proj.weight])[symbol:model.layers.24.self_attn.o_proj.weight] - tensor.CPU.register () -> (%141:tensor<[6144, 2048], Float32, CPU>[@model.layers.24.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=808), symbol:model.layers.24.mlp.gate_proj.weight])[symbol:model.layers.24.mlp.gate_proj.weight] - tensor.CPU.register () -> (%126:tensor<[6144, 2048], Float32, CPU>[@model.layers.24.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=811), symbol:model.layers.24.mlp.up_proj.weight])[symbol:model.layers.24.mlp.up_proj.weight] - tensor.CPU.register () -> (%34:tensor<[2048, 6144], Float32, CPU>[@model.layers.24.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=813), symbol:model.layers.24.mlp.down_proj.weight])[symbol:model.layers.24.mlp.down_proj.weight] - tensor.CPU.register () -> (%206:tensor<[2048, 2048], Float32, CPU>[@model.layers.25.self_attn.q_proj.weight][symbol:model.layers.25.self_attn.q_proj.weight])[symbol:model.layers.25.self_attn.q_proj.weight] - tensor.CPU.register () -> (%27:tensor<[1024, 2048], Float32, CPU>[@model.layers.25.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=816), symbol:model.layers.25.self_attn.k_proj.weight])[symbol:model.layers.25.self_attn.k_proj.weight] - tensor.CPU.register () -> (%121:tensor<[1024, 2048], Float32, CPU>[@model.layers.25.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=818), symbol:model.layers.25.self_attn.v_proj.weight])[symbol:model.layers.25.self_attn.v_proj.weight] - tensor.CPU.register () -> (%150:tensor<[2048, 2048], Float32, CPU>[@model.layers.25.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=835), symbol:model.layers.25.self_attn.o_proj.weight])[symbol:model.layers.25.self_attn.o_proj.weight] - tensor.CPU.register () -> (%249:tensor<[6144, 2048], Float32, CPU>[@model.layers.25.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=838), symbol:model.layers.25.mlp.gate_proj.weight])[symbol:model.layers.25.mlp.gate_proj.weight] - tensor.CPU.register () -> (%159:tensor<[6144, 2048], Float32, CPU>[@model.layers.25.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=841), symbol:model.layers.25.mlp.up_proj.weight])[symbol:model.layers.25.mlp.up_proj.weight] - tensor.CPU.register () -> (%267:tensor<[2048, 6144], Float32, CPU>[@model.layers.25.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=843), symbol:model.layers.25.mlp.down_proj.weight])[symbol:model.layers.25.mlp.down_proj.weight] - tensor.CPU.register () -> (%265:tensor<[2048, 2048], Float32, CPU>[@model.layers.26.self_attn.q_proj.weight][symbol:model.layers.26.self_attn.q_proj.weight])[symbol:model.layers.26.self_attn.q_proj.weight] - tensor.CPU.register () -> (%190:tensor<[1024, 2048], Float32, CPU>[@model.layers.26.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=846), symbol:model.layers.26.self_attn.k_proj.weight])[symbol:model.layers.26.self_attn.k_proj.weight] - tensor.CPU.register () -> (%119:tensor<[1024, 2048], Float32, CPU>[@model.layers.26.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=848), symbol:model.layers.26.self_attn.v_proj.weight])[symbol:model.layers.26.self_attn.v_proj.weight] - tensor.CPU.register () -> (%88:tensor<[2048, 2048], Float32, CPU>[@model.layers.26.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=865), symbol:model.layers.26.self_attn.o_proj.weight])[symbol:model.layers.26.self_attn.o_proj.weight] - tensor.CPU.register () -> (%96:tensor<[6144, 2048], Float32, CPU>[@model.layers.26.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=868), symbol:model.layers.26.mlp.gate_proj.weight])[symbol:model.layers.26.mlp.gate_proj.weight] - tensor.CPU.register () -> (%62:tensor<[6144, 2048], Float32, CPU>[@model.layers.26.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=871), symbol:model.layers.26.mlp.up_proj.weight])[symbol:model.layers.26.mlp.up_proj.weight] - tensor.CPU.register () -> (%220:tensor<[2048, 6144], Float32, CPU>[@model.layers.26.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=873), symbol:model.layers.26.mlp.down_proj.weight])[symbol:model.layers.26.mlp.down_proj.weight] - tensor.CPU.register () -> (%185:tensor<[2048, 2048], Float32, CPU>[@model.layers.27.self_attn.q_proj.weight][symbol:model.layers.27.self_attn.q_proj.weight])[symbol:model.layers.27.self_attn.q_proj.weight] - tensor.CPU.register () -> (%12:tensor<[1024, 2048], Float32, CPU>[@model.layers.27.self_attn.k_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=876), symbol:model.layers.27.self_attn.k_proj.weight])[symbol:model.layers.27.self_attn.k_proj.weight] - tensor.CPU.register () -> (%54:tensor<[1024, 2048], Float32, CPU>[@model.layers.27.self_attn.v_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=878), symbol:model.layers.27.self_attn.v_proj.weight])[symbol:model.layers.27.self_attn.v_proj.weight] - tensor.CPU.register () -> (%60:tensor<[2048, 2048], Float32, CPU>[@model.layers.27.self_attn.o_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=895), symbol:model.layers.27.self_attn.o_proj.weight])[symbol:model.layers.27.self_attn.o_proj.weight] - tensor.CPU.register () -> (%144:tensor<[6144, 2048], Float32, CPU>[@model.layers.27.mlp.gate_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=898), symbol:model.layers.27.mlp.gate_proj.weight])[symbol:model.layers.27.mlp.gate_proj.weight] - tensor.CPU.register () -> (%146:tensor<[6144, 2048], Float32, CPU>[@model.layers.27.mlp.up_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=901), symbol:model.layers.27.mlp.up_proj.weight])[symbol:model.layers.27.mlp.up_proj.weight] - tensor.CPU.register () -> (%195:tensor<[2048, 6144], Float32, CPU>[@model.layers.27.mlp.down_proj.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=903), symbol:model.layers.27.mlp.down_proj.weight])[symbol:model.layers.27.mlp.down_proj.weight] - tensor.CPU.register () -> (%101:tensor<[151936, 2048], Float32, CPU>[@lm_head.weight][quant_recipe:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=906), symbol:lm_head.weight])[symbol:lm_head.weight] - } - } - graph.SubGraphOp @deinit [symbol:deinit] { - () -> () { - - } - } - graph.CallGraphOp @model (%318:tensor<[1, 32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=0)], %376:tensor<[1, 32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=1)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %320:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %322:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %324:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %326:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %328:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %330:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %332:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %334:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %336:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %338:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %340:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %342:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %344:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %346:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %348:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %350:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %352:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %354:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %356:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %358:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %360:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %362:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %364:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %366:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %368:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %370:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %372:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %374:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %321:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)], %323:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)], %325:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)], %327:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)], %329:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)], %331:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)], %333:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)], %335:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)], %337:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)], %339:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)], %341:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)], %343:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)], %345:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)], %347:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)], %349:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)], %351:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)], %353:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)], %355:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)], %357:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)], %359:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)], %361:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)], %363:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)], %365:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)], %367:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)], %369:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)], %371:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)], %373:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)], %375:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%1530:tensor<[1, 32, 151936], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=74)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=104)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=134)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=164)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=194)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=224)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=254)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=314)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=344)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=374)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=404)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=434)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=464)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=494)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=524)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=584)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=614)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=644)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=674)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=704)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=734)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=764)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=824)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=854)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=884)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=76)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=106)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=136)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=166)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=196)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=226)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=256)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=286)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=346)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=376)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=406)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=436)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=466)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=496)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=526)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=586)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=616)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=646)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=676)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=706)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=736)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=766)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=796)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=856)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=886)]) + graph.CallGraphOp @model (%8206:tensor<[1, 32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=0)], %8264:tensor<[32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=1)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8208:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8210:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8212:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8214:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8216:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8218:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8220:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8222:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8224:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8226:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8228:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8230:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8232:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8234:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8236:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8238:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8240:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8242:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8244:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8246:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8248:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8250:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8252:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8254:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8256:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8258:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8260:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8262:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8209:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)], %8211:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)], %8213:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)], %8215:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)], %8217:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)], %8219:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)], %8221:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)], %8223:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)], %8225:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)], %8227:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)], %8229:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)], %8231:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)], %8233:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)], %8235:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)], %8237:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)], %8239:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)], %8241:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)], %8243:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)], %8245:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)], %8247:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)], %8249:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)], %8251:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)], %8253:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)], %8255:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)], %8257:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)], %8259:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)], %8261:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)], %8263:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)]) -> (%9726:tensor<[1, 32, 151936], UInt16PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1049)], %8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)], %8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)], %8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)], %8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)], %8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)], %8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)], %8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)], %8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)], %8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)], %8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)], %8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)], %8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)], %8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)], %8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)], %9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)], %9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)], %9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)], %9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)], %9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)], %9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)], %9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)], %9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)], %9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)], %9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)], %9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)], %9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)], %9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)], %9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)], %8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)], %8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)], %8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)], %8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)], %8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)], %8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)], %8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)], %8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)], %8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)], %8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)], %8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)], %8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)], %8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)], %8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)], %9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)], %9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)], %9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)], %9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)], %9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)], %9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)], %9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)], %9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)], %9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)], %9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)], %9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)], %9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)], %9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)], %9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) graph.SubGraphOp @model [using_qnn:true, symbol:model] { - (%318:tensor<[1, 32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=0)], %376:tensor<[1, 32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=1)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %320:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %322:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %324:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %326:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %328:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %330:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %332:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %334:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %336:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %338:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %340:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %342:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %344:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %346:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %348:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %350:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %352:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %354:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %356:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %358:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %360:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %362:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %364:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %366:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %368:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %370:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %372:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %374:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %321:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)], %323:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)], %325:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)], %327:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)], %329:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)], %331:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)], %333:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)], %335:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)], %337:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)], %339:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)], %341:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)], %343:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)], %345:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)], %347:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)], %349:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)], %351:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)], %353:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)], %355:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)], %357:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)], %359:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)], %361:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)], %363:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)], %365:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)], %367:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)], %369:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)], %371:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)], %373:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)], %375:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%1530:tensor<[1, 32, 151936], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=74)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=104)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=134)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=164)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=194)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=224)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=254)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=314)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=344)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=374)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=404)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=434)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=464)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=494)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=524)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=584)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=614)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=644)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=674)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=704)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=734)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=764)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=824)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=854)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=884)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=76)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=106)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=136)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=166)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=196)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=226)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=256)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=286)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=346)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=376)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=406)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=436)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=466)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=496)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=526)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=586)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=616)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=646)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=676)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=706)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=736)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=766)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=796)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=856)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=886)]) { - linalg.CPU.EmbeddingOp (%318:tensor<[1, 32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=0)]) -> (%377:tensor<[1, 32, 2048], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=59)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=59), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60), )] (%377:tensor<[1, 32, 2048], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=59)]) -> (%378:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int64), uuid=1), outputs_0:QuantSpec(Raw(type: Int64), uuid=1), )] (%376:tensor<[1, 32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=1)]) -> (%376:tensor<[32], Int64, CPU>[quant_recipe:QuantSpec(Raw(type: Int64), uuid=1)]) - linalg.CPU.IndexOp (%316:tensor<[1, 1024, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=61)]) -> (%379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)]) - linalg.CPU.IndexOp (%317:tensor<[1, 1024, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=63)]) -> (%380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) - graph.CallGraphOp @model.layers.0 (%378:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %320:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %321:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%421:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=74)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=76)]) - graph.CallGraphOp @model.layers.1 (%421:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %322:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %323:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%462:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=104)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=106)]) - graph.CallGraphOp @model.layers.2 (%462:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %324:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %325:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%503:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=134)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=136)]) - graph.CallGraphOp @model.layers.3 (%503:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %326:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %327:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%544:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=164)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=166)]) - graph.CallGraphOp @model.layers.4 (%544:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %328:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %329:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%585:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=214)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=194)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=196)]) - graph.CallGraphOp @model.layers.5 (%585:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=214)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %330:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %331:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%626:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=224)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=226)]) - graph.CallGraphOp @model.layers.6 (%626:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %332:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %333:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%667:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=254)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=256)]) - graph.CallGraphOp @model.layers.7 (%667:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %334:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %335:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%708:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=286)]) - graph.CallGraphOp @model.layers.8 (%708:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %336:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %337:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%749:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=314)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)]) - graph.CallGraphOp @model.layers.9 (%749:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %338:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %339:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%790:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=344)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=346)]) - graph.CallGraphOp @model.layers.10 (%790:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %340:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %341:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%831:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=374)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=376)]) - graph.CallGraphOp @model.layers.11 (%831:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %342:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %343:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%872:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=424)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=404)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=406)]) - graph.CallGraphOp @model.layers.12 (%872:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=424)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %344:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %345:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%913:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=434)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=436)]) - graph.CallGraphOp @model.layers.13 (%913:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %346:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %347:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%954:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=464)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=466)]) - graph.CallGraphOp @model.layers.14 (%954:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %348:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %349:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%995:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=494)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=496)]) - graph.CallGraphOp @model.layers.15 (%995:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %350:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %351:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%1036:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=524)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=526)]) - graph.CallGraphOp @model.layers.16 (%1036:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %352:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %353:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%1077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) - graph.CallGraphOp @model.layers.17 (%1077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %354:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %355:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%1118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=584)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=586)]) - graph.CallGraphOp @model.layers.18 (%1118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %356:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %357:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%1159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=614)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=616)]) - graph.CallGraphOp @model.layers.19 (%1159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %358:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %359:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%1200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=644)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=646)]) - graph.CallGraphOp @model.layers.20 (%1200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %360:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %361:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%1241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=674)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=676)]) - graph.CallGraphOp @model.layers.21 (%1241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %362:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %363:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%1282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=724)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=704)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=706)]) - graph.CallGraphOp @model.layers.22 (%1282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=724)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %364:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %365:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%1323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=734)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=736)]) - graph.CallGraphOp @model.layers.23 (%1323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %366:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %367:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%1364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=764)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=766)]) - graph.CallGraphOp @model.layers.24 (%1364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %368:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %369:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%1405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=796)]) - graph.CallGraphOp @model.layers.25 (%1405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %370:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %371:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%1446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=824)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)]) - graph.CallGraphOp @model.layers.26 (%1446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %372:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %373:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%1487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=854)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=856)]) - graph.CallGraphOp @model.layers.27 (%1487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %374:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %375:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%1528:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=884)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=886)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=905), )] (%1528:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)]) -> (%1529:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=905)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=905), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=906)), using_qnn:true] (%1529:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=905)]) -> (%1530:tensor<[1, 32, 151936], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907)]) - cf.ReturnOp (%1530:tensor<[1, 32, 151936], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=907)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=74)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=104)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=134)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=164)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=194)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=224)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=254)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=314)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=344)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=374)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=404)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=434)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=464)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=494)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=524)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=584)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=614)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=644)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=674)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=704)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=734)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=764)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=824)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=854)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=884)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=76)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=106)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=136)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=166)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=196)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=226)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=256)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=286)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=346)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=376)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=406)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=436)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=466)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=496)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=526)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=586)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=616)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=646)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=676)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=706)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=736)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=766)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=796)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=856)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=886)]) -> () - } - } - graph.SubGraphOp @model.layers.0 [using_qnn:true, symbol:model.layers.0] { - (%378:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %320:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %321:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%421:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=74)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=76)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), )] (%378:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)]) -> (%381:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) - graph.CallGraphOp @model.layers.0.self_attn (%381:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %320:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %321:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%413:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=86)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=74)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=76)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=86), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=86), )] (%413:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=86)], %378:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=60)]) -> (%414:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=86)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=86), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87), )] (%414:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=86)]) -> (%415:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87)]) - graph.CallGraphOp @model.layers.0.mlp (%415:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87)]) -> (%420:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=86), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94), )] (%420:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)], %414:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=86)]) -> (%421:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)]) - cf.ReturnOp (%421:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=74)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=76)]) -> () - } - } - graph.SubGraphOp @model.layers.0.self_attn [using_qnn:true, symbol:model.layers.0.self_attn] { - (%381:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %320:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %321:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%413:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=86)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=74)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=76)]) { - linalg.CPU.LinearOp (%381:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%382:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=70)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=67), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=66))] (%381:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%383:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=67)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=68))] (%381:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=65)]) -> (%384:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=70), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=70), )] (%382:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=70)]) -> (%382:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=70)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=70), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=70), )] (%382:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=70)]) -> (%385:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=70)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=67), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=67), )] (%383:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=67)]) -> (%383:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=67)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=67), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=67), )] (%383:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=67)]) -> (%386:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=67)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69), )] (%384:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69)]) -> (%384:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69), )] (%384:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69)]) -> (%387:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=70), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71), )] (%385:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=70)]) -> (%388:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=67), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=72), )] (%386:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=67)]) -> (%389:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=72)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71), )] (%388:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%390:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=72), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=72), )] (%389:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=72)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%391:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=72)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=72), outputs_0:QuantSpec(Raw(type: Float16), uuid=73), )] (%391:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=72)]) -> (%392:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=73)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=73), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=74), )] (%392:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=73)]) -> (%393:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=74)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=74), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=74), )] (%393:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=74)]) -> (%394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=74)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69), outputs_0:QuantSpec(Raw(type: Float16), uuid=75), )] (%387:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=69)]) -> (%395:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=75)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=75), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=76), )] (%395:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=75)]) -> (%396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=76)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=74), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), )] (%320:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=74)]) -> (%397:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=76), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), )] (%321:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=76)]) -> (%398:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), )] (%397:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) -> (%399:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), )] (%398:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%400:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=77), )] (%390:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=71)], %399:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) -> (%401:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=77)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=77), inputs_1:QuantSpec(Raw(type: Float32), uuid=78), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=77), )] (%401:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=77)], %402:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=78), constant:[0.088388346]]) -> (%403:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=77)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=77), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=79), )] (%403:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=77)]) -> (%404:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=79)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=79), inputs_1:QuantSpec(Raw(type: Int16), uuid=80), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=79), )] (%404:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=79)], %405:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=80), constant:[-20]]) -> (%406:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=79)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=81), outputs_0:QuantSpec(Raw(type: UInt8), uuid=82), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %407:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=81), constant:[0]]) -> (%408:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=82)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=82), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=77), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=79), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=79), )] (%408:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=82)], %403:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=77)], %406:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=79)]) -> (%409:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=79)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=79), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83), )] (%409:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=79)]) -> (%410:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=84), )] (%410:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=83)], %400:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=31)]) -> (%411:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=84)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=84), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=84), )] (%411:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=84)]) -> (%412:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=84)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=84), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=84), )] (%412:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=84)]) -> (%412:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=84)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=84), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=86), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=85))] (%412:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=84)]) -> (%413:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=86)]) - cf.ReturnOp (%413:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=86)], %394:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=74)], %396:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=76)]) -> () - } - } - graph.SubGraphOp @model.layers.0.mlp [using_qnn:true, symbol:model.layers.0.mlp] { - (%415:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87)]) -> (%420:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=89), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=88))] (%415:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87)]) -> (%416:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=89)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=89), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90), )] (%416:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=89)]) -> (%417:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=92), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=91))] (%415:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=87)]) -> (%418:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=92)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=92), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90), )] (%417:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)], %418:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=92)]) -> (%419:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=93))] (%419:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=90)]) -> (%420:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)]) - cf.ReturnOp (%420:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)]) -> () - } - } - graph.SubGraphOp @model.layers.1 [using_qnn:true, symbol:model.layers.1] { - (%421:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %322:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %323:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%462:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=104)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=106)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95), )] (%421:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)]) -> (%422:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95)]) - graph.CallGraphOp @model.layers.1.self_attn (%422:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %322:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %323:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%454:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=104)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=106)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116), )] (%454:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116)], %421:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=94)]) -> (%455:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117), )] (%455:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116)]) -> (%456:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117)]) - graph.CallGraphOp @model.layers.1.mlp (%456:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117)]) -> (%461:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124), )] (%461:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)], %455:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116)]) -> (%462:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) - cf.ReturnOp (%462:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=104)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=106)]) -> () - } - } - graph.SubGraphOp @model.layers.1.self_attn [using_qnn:true, symbol:model.layers.1.self_attn] { - (%422:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %322:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %323:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%454:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=104)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=106)]) { - linalg.CPU.LinearOp (%422:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95)]) -> (%423:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=100)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=96))] (%422:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95)]) -> (%424:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=98))] (%422:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=95)]) -> (%425:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=100), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=100), )] (%423:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=100)]) -> (%423:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=100)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=100), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=100), )] (%423:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=100)]) -> (%426:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=100)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97), )] (%424:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97)]) -> (%424:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97), )] (%424:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97)]) -> (%427:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), )] (%425:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) -> (%425:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), )] (%425:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) -> (%428:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=100), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=101), )] (%426:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=100)]) -> (%429:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=101)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102), )] (%427:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=97)]) -> (%430:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=101), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=101), )] (%429:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=101)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%431:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=101)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102), )] (%430:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%432:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102), outputs_0:QuantSpec(Raw(type: Float16), uuid=103), )] (%432:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=102)]) -> (%433:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=103)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=103), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=104), )] (%433:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=103)]) -> (%434:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=104)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=104), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=104), )] (%434:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=104)]) -> (%435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=104)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99), outputs_0:QuantSpec(Raw(type: Float16), uuid=105), )] (%428:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=99)]) -> (%436:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=105)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=105), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=106), )] (%436:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=105)]) -> (%437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=106)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=104), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), )] (%322:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=104)]) -> (%438:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=106), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), )] (%323:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=106)]) -> (%439:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), )] (%438:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) -> (%440:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), )] (%439:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%441:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=101), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107), )] (%431:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=101)], %440:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) -> (%442:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107), inputs_1:QuantSpec(Raw(type: Float32), uuid=108), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107), )] (%442:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107)], %443:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=108), constant:[0.088388346]]) -> (%444:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109), )] (%444:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107)]) -> (%445:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109), inputs_1:QuantSpec(Raw(type: Int16), uuid=110), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109), )] (%445:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109)], %446:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=110), constant:[-20]]) -> (%447:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=111), outputs_0:QuantSpec(Raw(type: UInt8), uuid=112), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %448:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=111), constant:[0]]) -> (%449:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=112)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=112), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109), )] (%449:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=112)], %444:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=107)], %447:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109)]) -> (%450:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=113), )] (%450:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=109)]) -> (%451:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=113)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=113), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114), )] (%451:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=113)], %441:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=32)]) -> (%452:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114), )] (%452:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114)]) -> (%453:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114), )] (%453:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114)]) -> (%453:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=115))] (%453:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=114)]) -> (%454:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116)]) - cf.ReturnOp (%454:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=116)], %435:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=104)], %437:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=106)]) -> () - } - } - graph.SubGraphOp @model.layers.1.mlp [using_qnn:true, symbol:model.layers.1.mlp] { - (%456:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117)]) -> (%461:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=119), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=118))] (%456:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117)]) -> (%457:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=119)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=119), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=120), )] (%457:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=119)]) -> (%458:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=120)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=121))] (%456:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=117)]) -> (%459:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=120), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=120), )] (%458:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=120)], %459:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=122)]) -> (%460:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=120)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=120), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=123))] (%460:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=120)]) -> (%461:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) - cf.ReturnOp (%461:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) -> () - } - } - graph.SubGraphOp @model.layers.2 [using_qnn:true, symbol:model.layers.2] { - (%462:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %324:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %325:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%503:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=134)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=136)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125), )] (%462:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) -> (%463:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125)]) - graph.CallGraphOp @model.layers.2.self_attn (%463:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %324:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %325:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%495:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=146)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=134)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=136)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=146), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=146), )] (%495:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=146)], %462:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=124)]) -> (%496:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=146)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=146), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=147), )] (%496:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=146)]) -> (%497:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=147)]) - graph.CallGraphOp @model.layers.2.mlp (%497:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=147)]) -> (%502:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=146), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154), )] (%502:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154)], %496:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=146)]) -> (%503:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154)]) - cf.ReturnOp (%503:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=134)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=136)]) -> () - } - } - graph.SubGraphOp @model.layers.2.self_attn [using_qnn:true, symbol:model.layers.2.self_attn] { - (%463:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %324:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %325:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%495:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=146)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=134)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=136)]) { - linalg.CPU.LinearOp (%463:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125)]) -> (%464:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=130)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=127), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=126))] (%463:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125)]) -> (%465:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=127)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=128))] (%463:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=125)]) -> (%466:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=130), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=130), )] (%464:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=130)]) -> (%464:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=130)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=130), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=130), )] (%464:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=130)]) -> (%467:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=130)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=127), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=127), )] (%465:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=127)]) -> (%465:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=127)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=127), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=127), )] (%465:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=127)]) -> (%468:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=127)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129), )] (%466:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129)]) -> (%466:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129), )] (%466:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129)]) -> (%469:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=130), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=131), )] (%467:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=130)]) -> (%470:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=131)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=127), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132), )] (%468:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=127)]) -> (%471:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=131), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=131), )] (%470:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=131)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%472:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=131)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132), )] (%471:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%473:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132), outputs_0:QuantSpec(Raw(type: Float16), uuid=133), )] (%473:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=132)]) -> (%474:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=133)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=133), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=134), )] (%474:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=133)]) -> (%475:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=134)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=134), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=134), )] (%475:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=134)]) -> (%476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=134)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129), outputs_0:QuantSpec(Raw(type: Float16), uuid=135), )] (%469:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=129)]) -> (%477:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=135)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=135), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=136), )] (%477:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=135)]) -> (%478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=136)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=134), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), )] (%324:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=134)]) -> (%479:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=136), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), )] (%325:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=136)]) -> (%480:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), )] (%479:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) -> (%481:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), )] (%480:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%482:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=131), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137), )] (%472:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=131)], %481:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) -> (%483:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137), inputs_1:QuantSpec(Raw(type: Float32), uuid=138), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137), )] (%483:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137)], %484:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=138), constant:[0.088388346]]) -> (%485:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139), )] (%485:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137)]) -> (%486:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139), inputs_1:QuantSpec(Raw(type: Int16), uuid=140), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139), )] (%486:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139)], %487:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=140), constant:[-20]]) -> (%488:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=141), outputs_0:QuantSpec(Raw(type: UInt8), uuid=142), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %489:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=141), constant:[0]]) -> (%490:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=142)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=142), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139), )] (%490:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=142)], %485:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=137)], %488:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139)]) -> (%491:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143), )] (%491:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=139)]) -> (%492:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=144), )] (%492:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=143)], %482:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=33)]) -> (%493:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=144)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=144), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=144), )] (%493:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=144)]) -> (%494:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=144)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=144), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=144), )] (%494:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=144)]) -> (%494:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=144)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=144), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=146), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=145))] (%494:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=144)]) -> (%495:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=146)]) - cf.ReturnOp (%495:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=146)], %476:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=134)], %478:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=136)]) -> () - } - } - graph.SubGraphOp @model.layers.2.mlp [using_qnn:true, symbol:model.layers.2.mlp] { - (%497:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=147)]) -> (%502:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=147), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=148))] (%497:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=147)]) -> (%498:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150), )] (%498:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=149)]) -> (%499:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=147), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=152), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=151))] (%497:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=147)]) -> (%500:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=152)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=152), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150), )] (%499:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150)], %500:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=152)]) -> (%501:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=153))] (%501:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=150)]) -> (%502:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154)]) - cf.ReturnOp (%502:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154)]) -> () - } - } - graph.SubGraphOp @model.layers.3 [using_qnn:true, symbol:model.layers.3] { - (%503:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %326:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %327:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%544:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=164)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=166)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155), )] (%503:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154)]) -> (%504:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155)]) - graph.CallGraphOp @model.layers.3.self_attn (%504:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %326:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %327:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%536:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=164)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=166)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176), )] (%536:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176)], %503:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=154)]) -> (%537:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177), )] (%537:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176)]) -> (%538:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177)]) - graph.CallGraphOp @model.layers.3.mlp (%538:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177)]) -> (%543:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184), )] (%543:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)], %537:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176)]) -> (%544:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)]) - cf.ReturnOp (%544:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=164)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=166)]) -> () - } - } - graph.SubGraphOp @model.layers.3.self_attn [using_qnn:true, symbol:model.layers.3.self_attn] { - (%504:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %326:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %327:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%536:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=164)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=166)]) { - linalg.CPU.LinearOp (%504:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155)]) -> (%505:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=160)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=156))] (%504:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155)]) -> (%506:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=158))] (%504:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=155)]) -> (%507:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=160), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=160), )] (%505:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=160)]) -> (%505:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=160)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=160), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=160), )] (%505:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=160)]) -> (%508:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=160)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157), )] (%506:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157)]) -> (%506:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157), )] (%506:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157)]) -> (%509:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159), )] (%507:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159)]) -> (%507:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159), )] (%507:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159)]) -> (%510:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=160), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=161), )] (%508:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=160)]) -> (%511:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=161)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162), )] (%509:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=157)]) -> (%512:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=161), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=161), )] (%511:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=161)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%513:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=161)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162), )] (%512:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%514:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162), outputs_0:QuantSpec(Raw(type: Float16), uuid=163), )] (%514:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=162)]) -> (%515:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=163)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=163), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=164), )] (%515:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=163)]) -> (%516:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=164)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=164), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=164), )] (%516:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=164)]) -> (%517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=164)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159), outputs_0:QuantSpec(Raw(type: Float16), uuid=165), )] (%510:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=159)]) -> (%518:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=165)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=165), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=166), )] (%518:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=165)]) -> (%519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=166)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=164), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), )] (%326:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=164)]) -> (%520:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=166), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), )] (%327:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=166)]) -> (%521:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), )] (%520:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) -> (%522:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), )] (%521:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%523:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=161), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167), )] (%513:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=161)], %522:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) -> (%524:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167), inputs_1:QuantSpec(Raw(type: Float32), uuid=168), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167), )] (%524:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)], %525:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=168), constant:[0.088388346]]) -> (%526:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=169), )] (%526:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)]) -> (%527:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=169)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=169), inputs_1:QuantSpec(Raw(type: Int16), uuid=170), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=169), )] (%527:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=169)], %528:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=170), constant:[-20]]) -> (%529:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=169)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=171), outputs_0:QuantSpec(Raw(type: UInt8), uuid=172), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %530:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=171), constant:[0]]) -> (%531:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=172)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=172), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=169), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=169), )] (%531:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=172)], %526:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=167)], %529:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=169)]) -> (%532:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=169)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=169), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173), )] (%532:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=169)]) -> (%533:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174), )] (%533:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=173)], %523:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=34)]) -> (%534:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174), )] (%534:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174)]) -> (%535:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174), )] (%535:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174)]) -> (%535:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=175))] (%535:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=174)]) -> (%536:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176)]) - cf.ReturnOp (%536:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=176)], %517:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=164)], %519:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=166)]) -> () - } - } - graph.SubGraphOp @model.layers.3.mlp [using_qnn:true, symbol:model.layers.3.mlp] { - (%538:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177)]) -> (%543:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=179), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=178))] (%538:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177)]) -> (%539:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=179)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=179), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=180), )] (%539:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=179)]) -> (%540:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=180)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=182), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=181))] (%538:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=177)]) -> (%541:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=182)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=180), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=182), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=180), )] (%540:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=180)], %541:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=182)]) -> (%542:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=180)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=180), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=183))] (%542:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=180)]) -> (%543:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)]) - cf.ReturnOp (%543:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)]) -> () - } - } - graph.SubGraphOp @model.layers.4 [using_qnn:true, symbol:model.layers.4] { - (%544:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %328:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %329:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%585:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=214)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=194)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=196)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185), )] (%544:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)]) -> (%545:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185)]) - graph.CallGraphOp @model.layers.4.self_attn (%545:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %328:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %329:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%577:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=194)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=196)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206), )] (%577:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206)], %544:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=184)]) -> (%578:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207), )] (%578:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206)]) -> (%579:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207)]) - graph.CallGraphOp @model.layers.4.mlp (%579:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207)]) -> (%584:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=214)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=214), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=214), )] (%584:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=214)], %578:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206)]) -> (%585:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=214)]) - cf.ReturnOp (%585:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=214)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=194)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=196)]) -> () - } - } - graph.SubGraphOp @model.layers.4.self_attn [using_qnn:true, symbol:model.layers.4.self_attn] { - (%545:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %328:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %329:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%577:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=194)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=196)]) { - linalg.CPU.LinearOp (%545:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185)]) -> (%546:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=190)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=187), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=186))] (%545:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185)]) -> (%547:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=187)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=188))] (%545:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=185)]) -> (%548:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=190), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=190), )] (%546:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=190)]) -> (%546:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=190)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=190), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=190), )] (%546:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=190)]) -> (%549:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=190)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=187), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=187), )] (%547:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=187)]) -> (%547:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=187)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=187), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=187), )] (%547:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=187)]) -> (%550:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=187)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189), )] (%548:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189)]) -> (%548:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189), )] (%548:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189)]) -> (%551:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=190), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191), )] (%549:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=190)]) -> (%552:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=187), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192), )] (%550:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=187)]) -> (%553:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191), )] (%552:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%554:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192), )] (%553:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%555:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192), outputs_0:QuantSpec(Raw(type: Float16), uuid=193), )] (%555:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=192)]) -> (%556:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=193)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=193), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=194), )] (%556:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=193)]) -> (%557:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=194)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=194), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=194), )] (%557:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=194)]) -> (%558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=194)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189), outputs_0:QuantSpec(Raw(type: Float16), uuid=195), )] (%551:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=189)]) -> (%559:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=195)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=195), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=196), )] (%559:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=195)]) -> (%560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=196)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=194), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), )] (%328:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=194)]) -> (%561:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=196), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), )] (%329:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=196)]) -> (%562:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), )] (%561:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) -> (%563:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), )] (%562:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%564:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197), )] (%554:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=191)], %563:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) -> (%565:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197), inputs_1:QuantSpec(Raw(type: Float32), uuid=198), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197), )] (%565:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197)], %566:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=198), constant:[0.088388346]]) -> (%567:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199), )] (%567:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197)]) -> (%568:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199), inputs_1:QuantSpec(Raw(type: Int16), uuid=200), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199), )] (%568:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199)], %569:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=200), constant:[-20]]) -> (%570:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=201), outputs_0:QuantSpec(Raw(type: UInt8), uuid=202), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %571:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=201), constant:[0]]) -> (%572:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=202)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=202), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199), )] (%572:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=202)], %567:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=197)], %570:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199)]) -> (%573:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=203), )] (%573:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=199)]) -> (%574:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=203)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=203), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204), )] (%574:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=203)], %564:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=35)]) -> (%575:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204), )] (%575:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204)]) -> (%576:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204), )] (%576:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204)]) -> (%576:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=205))] (%576:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=204)]) -> (%577:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206)]) - cf.ReturnOp (%577:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=206)], %558:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=194)], %560:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=196)]) -> () - } - } - graph.SubGraphOp @model.layers.4.mlp [using_qnn:true, symbol:model.layers.4.mlp] { - (%579:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207)]) -> (%584:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=214)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=208))] (%579:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207)]) -> (%580:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=210), )] (%580:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=209)]) -> (%581:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=210)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=212), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=211))] (%579:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=207)]) -> (%582:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=212)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=210), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=212), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=210), )] (%581:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=210)], %582:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=212)]) -> (%583:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=210)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=210), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=214), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=213))] (%583:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=210)]) -> (%584:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=214)]) - cf.ReturnOp (%584:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=214)]) -> () - } - } - graph.SubGraphOp @model.layers.5 [using_qnn:true, symbol:model.layers.5] { - (%585:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=214)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %330:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %331:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%626:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=224)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=226)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=214), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=215), )] (%585:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=214)]) -> (%586:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=215)]) - graph.CallGraphOp @model.layers.5.self_attn (%586:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=215)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %330:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %331:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%618:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=224)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=226)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=214), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236), )] (%618:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)], %585:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=214)]) -> (%619:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=237), )] (%619:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)]) -> (%620:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=237)]) - graph.CallGraphOp @model.layers.5.mlp (%620:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=237)]) -> (%625:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244), )] (%625:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244)], %619:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)]) -> (%626:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244)]) - cf.ReturnOp (%626:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=224)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=226)]) -> () - } - } - graph.SubGraphOp @model.layers.5.self_attn [using_qnn:true, symbol:model.layers.5.self_attn] { - (%586:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=215)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %330:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %331:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%618:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=224)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=226)]) { - linalg.CPU.LinearOp (%586:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=215)]) -> (%587:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=220)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=215), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=216))] (%586:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=215)]) -> (%588:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=215), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=218))] (%586:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=215)]) -> (%589:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=220), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=220), )] (%587:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=220)]) -> (%587:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=220)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=220), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=220), )] (%587:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=220)]) -> (%590:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=220)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217), )] (%588:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217)]) -> (%588:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217), )] (%588:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217)]) -> (%591:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219), )] (%589:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219)]) -> (%589:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219), )] (%589:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219)]) -> (%592:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=220), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=221), )] (%590:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=220)]) -> (%593:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=221)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=222), )] (%591:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=217)]) -> (%594:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=222)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=221), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=221), )] (%593:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=221)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%595:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=221)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=222), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=222), )] (%594:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=222)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%596:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=222)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=222), outputs_0:QuantSpec(Raw(type: Float16), uuid=223), )] (%596:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=222)]) -> (%597:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=223)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=223), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=224), )] (%597:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=223)]) -> (%598:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=224)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=224), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=224), )] (%598:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=224)]) -> (%599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=224)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219), outputs_0:QuantSpec(Raw(type: Float16), uuid=225), )] (%592:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=219)]) -> (%600:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=225)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=225), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=226), )] (%600:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=225)]) -> (%601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=226)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=224), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), )] (%330:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=224)]) -> (%602:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=226), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), )] (%331:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=226)]) -> (%603:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), )] (%602:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) -> (%604:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), )] (%603:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%605:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=221), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227), )] (%595:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=221)], %604:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) -> (%606:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227), inputs_1:QuantSpec(Raw(type: Float32), uuid=228), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227), )] (%606:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227)], %607:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=228), constant:[0.088388346]]) -> (%608:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=229), )] (%608:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227)]) -> (%609:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=229)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=229), inputs_1:QuantSpec(Raw(type: Int16), uuid=230), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=229), )] (%609:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=229)], %610:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=230), constant:[-20]]) -> (%611:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=229)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=231), outputs_0:QuantSpec(Raw(type: UInt8), uuid=232), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %612:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=231), constant:[0]]) -> (%613:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=232)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=232), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=229), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=229), )] (%613:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=232)], %608:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=227)], %611:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=229)]) -> (%614:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=229)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=229), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=233), )] (%614:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=229)]) -> (%615:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=233)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=233), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234), )] (%615:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=233)], %605:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=36)]) -> (%616:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234), )] (%616:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)]) -> (%617:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234), )] (%617:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)]) -> (%617:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=235))] (%617:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=234)]) -> (%618:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)]) - cf.ReturnOp (%618:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=236)], %599:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=224)], %601:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=226)]) -> () - } - } - graph.SubGraphOp @model.layers.5.mlp [using_qnn:true, symbol:model.layers.5.mlp] { - (%620:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=237)]) -> (%625:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=237), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=238))] (%620:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=237)]) -> (%621:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240), )] (%621:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=239)]) -> (%622:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=237), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=242), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=241))] (%620:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=237)]) -> (%623:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=242)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=242), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240), )] (%622:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240)], %623:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=242)]) -> (%624:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=243))] (%624:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=240)]) -> (%625:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244)]) - cf.ReturnOp (%625:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244)]) -> () - } - } - graph.SubGraphOp @model.layers.6 [using_qnn:true, symbol:model.layers.6] { - (%626:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %332:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %333:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%667:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=254)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=256)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245), )] (%626:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244)]) -> (%627:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245)]) - graph.CallGraphOp @model.layers.6.self_attn (%627:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %332:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %333:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%659:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=266)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=254)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=256)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=266), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=266), )] (%659:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=266)], %626:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=244)]) -> (%660:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=266)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=266), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=267), )] (%660:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=266)]) -> (%661:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=267)]) - graph.CallGraphOp @model.layers.6.mlp (%661:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=267)]) -> (%666:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=266), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274), )] (%666:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)], %660:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=266)]) -> (%667:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)]) - cf.ReturnOp (%667:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=254)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=256)]) -> () - } - } - graph.SubGraphOp @model.layers.6.self_attn [using_qnn:true, symbol:model.layers.6.self_attn] { - (%627:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %332:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %333:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%659:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=266)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=254)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=256)]) { - linalg.CPU.LinearOp (%627:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245)]) -> (%628:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=250)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=247), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=246))] (%627:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245)]) -> (%629:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=247)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=249), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=248))] (%627:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=245)]) -> (%630:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=249)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=250), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=250), )] (%628:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=250)]) -> (%628:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=250)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=250), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=250), )] (%628:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=250)]) -> (%631:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=250)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=247), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=247), )] (%629:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=247)]) -> (%629:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=247)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=247), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=247), )] (%629:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=247)]) -> (%632:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=247)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=249), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=249), )] (%630:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=249)]) -> (%630:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=249)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=249), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=249), )] (%630:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=249)]) -> (%633:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=249)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=250), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251), )] (%631:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=250)]) -> (%634:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=247), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252), )] (%632:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=247)]) -> (%635:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251), )] (%634:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%636:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252), )] (%635:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%637:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252), outputs_0:QuantSpec(Raw(type: Float16), uuid=253), )] (%637:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=252)]) -> (%638:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=253)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=253), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=254), )] (%638:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=253)]) -> (%639:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=254)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=254), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=254), )] (%639:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=254)]) -> (%640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=254)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=249), outputs_0:QuantSpec(Raw(type: Float16), uuid=255), )] (%633:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=249)]) -> (%641:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=255)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=255), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=256), )] (%641:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=255)]) -> (%642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=256)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=254), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), )] (%332:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=254)]) -> (%643:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=256), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), )] (%333:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=256)]) -> (%644:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), )] (%643:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) -> (%645:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), )] (%644:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%646:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257), )] (%636:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=251)], %645:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) -> (%647:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257), inputs_1:QuantSpec(Raw(type: Float32), uuid=258), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257), )] (%647:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257)], %648:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=258), constant:[0.088388346]]) -> (%649:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259), )] (%649:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257)]) -> (%650:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259), inputs_1:QuantSpec(Raw(type: Int16), uuid=260), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259), )] (%650:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259)], %651:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=260), constant:[-20]]) -> (%652:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=261), outputs_0:QuantSpec(Raw(type: UInt8), uuid=262), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %653:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=261), constant:[0]]) -> (%654:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=262)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=262), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259), )] (%654:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=262)], %649:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=257)], %652:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259)]) -> (%655:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=263), )] (%655:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=259)]) -> (%656:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=263)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=263), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264), )] (%656:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=263)], %646:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=37)]) -> (%657:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264), )] (%657:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264)]) -> (%658:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264), )] (%658:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264)]) -> (%658:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=266), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=265))] (%658:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=264)]) -> (%659:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=266)]) - cf.ReturnOp (%659:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=266)], %640:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=254)], %642:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=256)]) -> () - } - } - graph.SubGraphOp @model.layers.6.mlp [using_qnn:true, symbol:model.layers.6.mlp] { - (%661:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=267)]) -> (%666:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=267), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=268))] (%661:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=267)]) -> (%662:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270), )] (%662:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=269)]) -> (%663:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=267), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=272), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=271))] (%661:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=267)]) -> (%664:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=272)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=272), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270), )] (%663:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270)], %664:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=272)]) -> (%665:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=273))] (%665:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=270)]) -> (%666:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)]) - cf.ReturnOp (%666:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)]) -> () - } - } - graph.SubGraphOp @model.layers.7 [using_qnn:true, symbol:model.layers.7] { - (%667:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %334:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %335:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%708:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=286)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275), )] (%667:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)]) -> (%668:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275)]) - graph.CallGraphOp @model.layers.7.self_attn (%668:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %334:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %335:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%700:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=296)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=286)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=296), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=296), )] (%700:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=296)], %667:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=274)]) -> (%701:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=296)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=296), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=297), )] (%701:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=296)]) -> (%702:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=297)]) - graph.CallGraphOp @model.layers.7.mlp (%702:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=297)]) -> (%707:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=296), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304), )] (%707:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)], %701:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=296)]) -> (%708:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)]) - cf.ReturnOp (%708:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=286)]) -> () - } - } - graph.SubGraphOp @model.layers.7.self_attn [using_qnn:true, symbol:model.layers.7.self_attn] { - (%668:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %334:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %335:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%700:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=296)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=286)]) { - linalg.CPU.LinearOp (%668:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275)]) -> (%669:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=280)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=276))] (%668:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275)]) -> (%670:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=278))] (%668:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=275)]) -> (%671:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=280), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=280), )] (%669:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=280)]) -> (%669:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=280)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=280), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=280), )] (%669:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=280)]) -> (%672:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=280)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277), )] (%670:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277)]) -> (%670:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277), )] (%670:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277)]) -> (%673:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279), )] (%671:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279)]) -> (%671:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279), )] (%671:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279)]) -> (%674:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=280), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=281), )] (%672:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=280)]) -> (%675:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=281)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=282), )] (%673:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=277)]) -> (%676:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=282)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=281), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=281), )] (%675:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=281)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%677:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=281)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=282), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=282), )] (%676:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=282)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%678:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=282)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=282), outputs_0:QuantSpec(Raw(type: Float16), uuid=283), )] (%678:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=282)]) -> (%679:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=283)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=283), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284), )] (%679:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=283)]) -> (%680:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284), )] (%680:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) -> (%681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279), outputs_0:QuantSpec(Raw(type: Float16), uuid=285), )] (%674:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=279)]) -> (%682:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=285)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=285), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=286), )] (%682:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=285)]) -> (%683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=286)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), )] (%334:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)]) -> (%684:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=286), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), )] (%335:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=286)]) -> (%685:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), )] (%684:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) -> (%686:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), )] (%685:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%687:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=281), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287), )] (%677:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=281)], %686:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) -> (%688:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287), inputs_1:QuantSpec(Raw(type: Float32), uuid=288), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287), )] (%688:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287)], %689:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=288), constant:[0.088388346]]) -> (%690:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=289), )] (%690:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287)]) -> (%691:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=289)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=289), inputs_1:QuantSpec(Raw(type: Int16), uuid=290), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=289), )] (%691:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=289)], %692:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=290), constant:[-20]]) -> (%693:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=289)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=291), outputs_0:QuantSpec(Raw(type: UInt8), uuid=292), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %694:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=291), constant:[0]]) -> (%695:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=292)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=292), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=289), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=289), )] (%695:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=292)], %690:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=287)], %693:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=289)]) -> (%696:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=289)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=289), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=293), )] (%696:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=289)]) -> (%697:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=293)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=293), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), )] (%697:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=293)], %687:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=38)]) -> (%698:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), )] (%698:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) -> (%699:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), )] (%699:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) -> (%699:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=296), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=295))] (%699:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=294)]) -> (%700:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=296)]) - cf.ReturnOp (%700:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=296)], %681:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=284)], %683:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=286)]) -> () - } - } - graph.SubGraphOp @model.layers.7.mlp [using_qnn:true, symbol:model.layers.7.mlp] { - (%702:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=297)]) -> (%707:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=297), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=298))] (%702:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=297)]) -> (%703:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=300), )] (%703:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=299)]) -> (%704:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=300)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=297), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=301))] (%702:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=297)]) -> (%705:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=300), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=300), )] (%704:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=300)], %705:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=302)]) -> (%706:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=300)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=300), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=303))] (%706:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=300)]) -> (%707:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)]) - cf.ReturnOp (%707:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)]) -> () - } - } - graph.SubGraphOp @model.layers.8 [using_qnn:true, symbol:model.layers.8] { - (%708:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %336:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %337:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%749:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=314)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=305), )] (%708:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)]) -> (%709:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=305)]) - graph.CallGraphOp @model.layers.8.self_attn (%709:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=305)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %336:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %337:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%741:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=314)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326), )] (%741:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)], %708:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=304)]) -> (%742:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327), )] (%742:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)]) -> (%743:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327)]) - graph.CallGraphOp @model.layers.8.mlp (%743:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327)]) -> (%748:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334), )] (%748:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334)], %742:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)]) -> (%749:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334)]) - cf.ReturnOp (%749:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=314)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)]) -> () - } - } - graph.SubGraphOp @model.layers.8.self_attn [using_qnn:true, symbol:model.layers.8.self_attn] { - (%709:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=305)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %336:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %337:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%741:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=314)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)]) { - linalg.CPU.LinearOp (%709:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=305)]) -> (%710:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=310)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=305), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=306))] (%709:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=305)]) -> (%711:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=305), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=308))] (%709:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=305)]) -> (%712:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=310), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=310), )] (%710:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=310)]) -> (%710:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=310)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=310), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=310), )] (%710:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=310)]) -> (%713:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=310)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307), )] (%711:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307)]) -> (%711:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307), )] (%711:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307)]) -> (%714:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309), )] (%712:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309)]) -> (%712:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309), )] (%712:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309)]) -> (%715:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=310), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311), )] (%713:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=310)]) -> (%716:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=312), )] (%714:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=307)]) -> (%717:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=312)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311), )] (%716:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%718:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=312), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=312), )] (%717:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=312)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%719:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=312)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=312), outputs_0:QuantSpec(Raw(type: Float16), uuid=313), )] (%719:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=312)]) -> (%720:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=313)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=313), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=314), )] (%720:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=313)]) -> (%721:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=314)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=314), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=314), )] (%721:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=314)]) -> (%722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=314)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309), outputs_0:QuantSpec(Raw(type: Float16), uuid=315), )] (%715:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=309)]) -> (%723:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=315)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=315), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316), )] (%723:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=315)]) -> (%724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=314), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), )] (%336:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=314)]) -> (%725:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), )] (%337:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)]) -> (%726:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), )] (%725:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) -> (%727:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), )] (%726:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%728:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=317), )] (%718:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=311)], %727:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) -> (%729:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=317)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=317), inputs_1:QuantSpec(Raw(type: Float32), uuid=318), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=317), )] (%729:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=317)], %730:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=318), constant:[0.088388346]]) -> (%731:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=317)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=317), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319), )] (%731:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=317)]) -> (%732:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319), inputs_1:QuantSpec(Raw(type: Int16), uuid=320), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319), )] (%732:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319)], %733:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=320), constant:[-20]]) -> (%734:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=321), outputs_0:QuantSpec(Raw(type: UInt8), uuid=322), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %735:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=321), constant:[1]]) -> (%736:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=322)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=322), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=317), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319), )] (%736:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=322)], %731:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=317)], %734:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319)]) -> (%737:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=323), )] (%737:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=319)]) -> (%738:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=323)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=323), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=324), )] (%738:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=323)], %728:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=39)]) -> (%739:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=324)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=324), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=324), )] (%739:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=324)]) -> (%740:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=324)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=324), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=324), )] (%740:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=324)]) -> (%740:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=324)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=324), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=325))] (%740:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=324)]) -> (%741:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)]) - cf.ReturnOp (%741:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=326)], %722:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=314)], %724:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=316)]) -> () - } - } - graph.SubGraphOp @model.layers.8.mlp [using_qnn:true, symbol:model.layers.8.mlp] { - (%743:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327)]) -> (%748:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=328))] (%743:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327)]) -> (%744:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=330), )] (%744:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=329)]) -> (%745:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=330)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=331))] (%743:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=327)]) -> (%746:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=330), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=330), )] (%745:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=330)], %746:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=332)]) -> (%747:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=330)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=330), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=333))] (%747:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=330)]) -> (%748:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334)]) - cf.ReturnOp (%748:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334)]) -> () - } - } - graph.SubGraphOp @model.layers.9 [using_qnn:true, symbol:model.layers.9] { - (%749:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %338:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %339:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%790:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=344)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=346)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=335), )] (%749:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334)]) -> (%750:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=335)]) - graph.CallGraphOp @model.layers.9.self_attn (%750:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=335)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %338:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %339:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%782:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=356)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=344)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=346)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=356), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=356), )] (%782:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=356)], %749:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=334)]) -> (%783:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=356)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=356), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=357), )] (%783:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=356)]) -> (%784:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=357)]) - graph.CallGraphOp @model.layers.9.mlp (%784:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=357)]) -> (%789:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=356), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364), )] (%789:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364)], %783:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=356)]) -> (%790:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364)]) - cf.ReturnOp (%790:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=344)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=346)]) -> () - } - } - graph.SubGraphOp @model.layers.9.self_attn [using_qnn:true, symbol:model.layers.9.self_attn] { - (%750:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=335)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %338:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %339:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%782:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=356)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=344)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=346)]) { - linalg.CPU.LinearOp (%750:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=335)]) -> (%751:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=340)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=335), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=336))] (%750:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=335)]) -> (%752:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=335), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=339), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=338))] (%750:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=335)]) -> (%753:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=339)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=340), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=340), )] (%751:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=340)]) -> (%751:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=340)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=340), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=340), )] (%751:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=340)]) -> (%754:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=340)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), )] (%752:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) -> (%752:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), )] (%752:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) -> (%755:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=339), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=339), )] (%753:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=339)]) -> (%753:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=339)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=339), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=339), )] (%753:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=339)]) -> (%756:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=339)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=340), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341), )] (%754:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=340)]) -> (%757:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342), )] (%755:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=337)]) -> (%758:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341), )] (%757:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%759:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342), )] (%758:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%760:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342), outputs_0:QuantSpec(Raw(type: Float16), uuid=343), )] (%760:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=342)]) -> (%761:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=343)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=343), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=344), )] (%761:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=343)]) -> (%762:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=344)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=344), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=344), )] (%762:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=344)]) -> (%763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=344)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=339), outputs_0:QuantSpec(Raw(type: Float16), uuid=345), )] (%756:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=339)]) -> (%764:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=345)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=345), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=346), )] (%764:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=345)]) -> (%765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=346)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=344), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), )] (%338:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=344)]) -> (%766:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=346), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), )] (%339:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=346)]) -> (%767:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), )] (%766:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) -> (%768:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), )] (%767:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%769:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347), )] (%759:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=341)], %768:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) -> (%770:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347), inputs_1:QuantSpec(Raw(type: Float32), uuid=348), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347), )] (%770:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347)], %771:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=348), constant:[0.088388346]]) -> (%772:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=349), )] (%772:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347)]) -> (%773:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=349)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=349), inputs_1:QuantSpec(Raw(type: Int16), uuid=350), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=349), )] (%773:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=349)], %774:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=350), constant:[-20]]) -> (%775:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=349)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=351), outputs_0:QuantSpec(Raw(type: UInt8), uuid=352), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %776:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=351), constant:[-0.1796875]]) -> (%777:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=352)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=352), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=349), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=349), )] (%777:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=352)], %772:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=347)], %775:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=349)]) -> (%778:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=349)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=349), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353), )] (%778:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=349)]) -> (%779:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354), )] (%779:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=353)], %769:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=40)]) -> (%780:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354), )] (%780:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354)]) -> (%781:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354), )] (%781:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354)]) -> (%781:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=356), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=355))] (%781:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=354)]) -> (%782:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=356)]) - cf.ReturnOp (%782:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=356)], %763:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=344)], %765:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=346)]) -> () - } - } - graph.SubGraphOp @model.layers.9.mlp [using_qnn:true, symbol:model.layers.9.mlp] { - (%784:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=357)]) -> (%789:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=357), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=358))] (%784:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=357)]) -> (%785:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360), )] (%785:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=359)]) -> (%786:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=357), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=361))] (%784:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=357)]) -> (%787:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360), )] (%786:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360)], %787:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=362)]) -> (%788:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=363))] (%788:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=360)]) -> (%789:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364)]) - cf.ReturnOp (%789:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364)]) -> () - } - } - graph.SubGraphOp @model.layers.10 [using_qnn:true, symbol:model.layers.10] { - (%790:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %340:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %341:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%831:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=374)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=376)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=365), )] (%790:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364)]) -> (%791:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=365)]) - graph.CallGraphOp @model.layers.10.self_attn (%791:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=365)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %340:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %341:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%823:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=374)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=376)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386), )] (%823:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386)], %790:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=364)]) -> (%824:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387), )] (%824:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386)]) -> (%825:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387)]) - graph.CallGraphOp @model.layers.10.mlp (%825:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387)]) -> (%830:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394), )] (%830:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)], %824:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386)]) -> (%831:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) - cf.ReturnOp (%831:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=374)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=376)]) -> () - } - } - graph.SubGraphOp @model.layers.10.self_attn [using_qnn:true, symbol:model.layers.10.self_attn] { - (%791:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=365)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %340:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %341:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%823:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=374)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=376)]) { - linalg.CPU.LinearOp (%791:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=365)]) -> (%792:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=370)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=365), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=366))] (%791:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=365)]) -> (%793:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=365), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=368))] (%791:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=365)]) -> (%794:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=370), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=370), )] (%792:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=370)]) -> (%792:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=370)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=370), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=370), )] (%792:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=370)]) -> (%795:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=370)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367), )] (%793:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367)]) -> (%793:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367), )] (%793:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367)]) -> (%796:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369), )] (%794:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369)]) -> (%794:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369), )] (%794:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369)]) -> (%797:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=370), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371), )] (%795:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=370)]) -> (%798:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372), )] (%796:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=367)]) -> (%799:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371), )] (%798:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%800:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372), )] (%799:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%801:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372), outputs_0:QuantSpec(Raw(type: Float16), uuid=373), )] (%801:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=372)]) -> (%802:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=373)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=373), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=374), )] (%802:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=373)]) -> (%803:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=374)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=374), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=374), )] (%803:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=374)]) -> (%804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=374)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369), outputs_0:QuantSpec(Raw(type: Float16), uuid=375), )] (%797:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=369)]) -> (%805:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=375)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=375), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=376), )] (%805:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=375)]) -> (%806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=376)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=374), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), )] (%340:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=374)]) -> (%807:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=376), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), )] (%341:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=376)]) -> (%808:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), )] (%807:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) -> (%809:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), )] (%808:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%810:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377), )] (%800:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=371)], %809:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) -> (%811:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377), inputs_1:QuantSpec(Raw(type: Float32), uuid=378), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377), )] (%811:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377)], %812:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=378), constant:[0.088388346]]) -> (%813:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379), )] (%813:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377)]) -> (%814:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379), inputs_1:QuantSpec(Raw(type: Int16), uuid=380), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379), )] (%814:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379)], %815:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=380), constant:[-20]]) -> (%816:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=381), outputs_0:QuantSpec(Raw(type: UInt8), uuid=382), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %817:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=381), constant:[-0.93359375]]) -> (%818:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=382)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=382), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379), )] (%818:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=382)], %813:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=377)], %816:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379)]) -> (%819:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=383), )] (%819:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=379)]) -> (%820:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=383)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=383), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=384), )] (%820:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=383)], %810:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=41)]) -> (%821:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=384)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=384), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=384), )] (%821:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=384)]) -> (%822:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=384)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=384), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=384), )] (%822:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=384)]) -> (%822:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=384)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=384), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=385))] (%822:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=384)]) -> (%823:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386)]) - cf.ReturnOp (%823:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=386)], %804:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=374)], %806:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=376)]) -> () - } - } - graph.SubGraphOp @model.layers.10.mlp [using_qnn:true, symbol:model.layers.10.mlp] { - (%825:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387)]) -> (%830:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=388))] (%825:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387)]) -> (%826:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=390), )] (%826:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=389)]) -> (%827:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=390)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=392), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=391))] (%825:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=387)]) -> (%828:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=392)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=390), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=392), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=390), )] (%827:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=390)], %828:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=392)]) -> (%829:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=390)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=390), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=393))] (%829:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=390)]) -> (%830:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) - cf.ReturnOp (%830:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) -> () - } - } - graph.SubGraphOp @model.layers.11 [using_qnn:true, symbol:model.layers.11] { - (%831:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %342:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %343:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%872:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=424)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=404)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=406)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395), )] (%831:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) -> (%832:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395)]) - graph.CallGraphOp @model.layers.11.self_attn (%832:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %342:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %343:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%864:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=416)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=404)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=406)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=416), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=416), )] (%864:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=416)], %831:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=394)]) -> (%865:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=416)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=416), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=417), )] (%865:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=416)]) -> (%866:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=417)]) - graph.CallGraphOp @model.layers.11.mlp (%866:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=417)]) -> (%871:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=424)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=424), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=416), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=424), )] (%871:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=424)], %865:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=416)]) -> (%872:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=424)]) - cf.ReturnOp (%872:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=424)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=404)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=406)]) -> () - } - } - graph.SubGraphOp @model.layers.11.self_attn [using_qnn:true, symbol:model.layers.11.self_attn] { - (%832:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %342:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %343:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%864:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=416)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=404)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=406)]) { - linalg.CPU.LinearOp (%832:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395)]) -> (%833:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=400)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=396))] (%832:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395)]) -> (%834:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=399), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=398))] (%832:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=395)]) -> (%835:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=399)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=400), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=400), )] (%833:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=400)]) -> (%833:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=400)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=400), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=400), )] (%833:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=400)]) -> (%836:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=400)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397), )] (%834:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397)]) -> (%834:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397), )] (%834:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397)]) -> (%837:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=399), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=399), )] (%835:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=399)]) -> (%835:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=399)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=399), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=399), )] (%835:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=399)]) -> (%838:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=399)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=400), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401), )] (%836:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=400)]) -> (%839:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=402), )] (%837:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=397)]) -> (%840:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=402)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401), )] (%839:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%841:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=402), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=402), )] (%840:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=402)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%842:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=402)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=402), outputs_0:QuantSpec(Raw(type: Float16), uuid=403), )] (%842:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=402)]) -> (%843:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=403)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=403), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=404), )] (%843:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=403)]) -> (%844:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=404)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=404), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=404), )] (%844:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=404)]) -> (%845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=404)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=399), outputs_0:QuantSpec(Raw(type: Float16), uuid=405), )] (%838:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=399)]) -> (%846:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=405)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=405), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=406), )] (%846:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=405)]) -> (%847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=406)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=404), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), )] (%342:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=404)]) -> (%848:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=406), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), )] (%343:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=406)]) -> (%849:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), )] (%848:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) -> (%850:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), )] (%849:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%851:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=407), )] (%841:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=401)], %850:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) -> (%852:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=407)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=407), inputs_1:QuantSpec(Raw(type: Float32), uuid=408), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=407), )] (%852:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=407)], %853:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=408), constant:[0.088388346]]) -> (%854:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=407)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=407), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409), )] (%854:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=407)]) -> (%855:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409), inputs_1:QuantSpec(Raw(type: Int16), uuid=410), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409), )] (%855:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409)], %856:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=410), constant:[-20]]) -> (%857:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=411), outputs_0:QuantSpec(Raw(type: UInt8), uuid=412), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %858:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=411), constant:[0.515625]]) -> (%859:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=412)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=412), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=407), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409), )] (%859:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=412)], %854:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=407)], %857:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409)]) -> (%860:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413), )] (%860:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=409)]) -> (%861:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414), )] (%861:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=413)], %851:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=42)]) -> (%862:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414), )] (%862:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414)]) -> (%863:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414), )] (%863:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414)]) -> (%863:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=416), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=415))] (%863:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=414)]) -> (%864:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=416)]) - cf.ReturnOp (%864:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=416)], %845:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=404)], %847:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=406)]) -> () - } - } - graph.SubGraphOp @model.layers.11.mlp [using_qnn:true, symbol:model.layers.11.mlp] { - (%866:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=417)]) -> (%871:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=424)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=417), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=419), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=418))] (%866:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=417)]) -> (%867:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=419)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=419), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=420), )] (%867:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=419)]) -> (%868:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=420)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=417), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=422), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=421))] (%866:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=417)]) -> (%869:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=422)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=420), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=422), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=420), )] (%868:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=420)], %869:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=422)]) -> (%870:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=420)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=420), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=424), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=423))] (%870:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=420)]) -> (%871:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=424)]) - cf.ReturnOp (%871:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=424)]) -> () - } - } - graph.SubGraphOp @model.layers.12 [using_qnn:true, symbol:model.layers.12] { - (%872:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=424)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %344:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %345:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%913:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=434)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=436)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=424), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=425), )] (%872:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=424)]) -> (%873:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=425)]) - graph.CallGraphOp @model.layers.12.self_attn (%873:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=425)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %344:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %345:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%905:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=434)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=436)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=424), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446), )] (%905:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446)], %872:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=424)]) -> (%906:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447), )] (%906:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446)]) -> (%907:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447)]) - graph.CallGraphOp @model.layers.12.mlp (%907:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447)]) -> (%912:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454), )] (%912:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454)], %906:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446)]) -> (%913:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454)]) - cf.ReturnOp (%913:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=434)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=436)]) -> () - } - } - graph.SubGraphOp @model.layers.12.self_attn [using_qnn:true, symbol:model.layers.12.self_attn] { - (%873:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=425)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %344:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %345:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%905:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=434)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=436)]) { - linalg.CPU.LinearOp (%873:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=425)]) -> (%874:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=430)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=425), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=426))] (%873:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=425)]) -> (%875:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=425), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=428))] (%873:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=425)]) -> (%876:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=430), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=430), )] (%874:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=430)]) -> (%874:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=430)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=430), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=430), )] (%874:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=430)]) -> (%877:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=430)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427), )] (%875:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427)]) -> (%875:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427), )] (%875:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427)]) -> (%878:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429), )] (%876:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429)]) -> (%876:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429), )] (%876:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429)]) -> (%879:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=430), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431), )] (%877:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=430)]) -> (%880:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=432), )] (%878:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=427)]) -> (%881:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=432)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431), )] (%880:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%882:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=432), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=432), )] (%881:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=432)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%883:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=432)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=432), outputs_0:QuantSpec(Raw(type: Float16), uuid=433), )] (%883:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=432)]) -> (%884:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=433)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=433), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=434), )] (%884:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=433)]) -> (%885:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=434)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=434), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=434), )] (%885:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=434)]) -> (%886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=434)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429), outputs_0:QuantSpec(Raw(type: Float16), uuid=435), )] (%879:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=429)]) -> (%887:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=435)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=435), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=436), )] (%887:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=435)]) -> (%888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=436)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=434), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), )] (%344:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=434)]) -> (%889:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=436), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), )] (%345:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=436)]) -> (%890:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), )] (%889:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) -> (%891:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), )] (%890:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%892:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=437), )] (%882:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=431)], %891:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) -> (%893:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=437)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=437), inputs_1:QuantSpec(Raw(type: Float32), uuid=438), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=437), )] (%893:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=437)], %894:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=438), constant:[0.088388346]]) -> (%895:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=437)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=437), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), )] (%895:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=437)]) -> (%896:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), inputs_1:QuantSpec(Raw(type: Int16), uuid=440), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), )] (%896:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)], %897:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=440), constant:[-20]]) -> (%898:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=441), outputs_0:QuantSpec(Raw(type: UInt8), uuid=442), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %899:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=441), constant:[0.74609375]]) -> (%900:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=442)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=442), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=437), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), )] (%900:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=442)], %895:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=437)], %898:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) -> (%901:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443), )] (%901:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=439)]) -> (%902:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444), )] (%902:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=443)], %892:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=43)]) -> (%903:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444), )] (%903:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444)]) -> (%904:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444), )] (%904:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444)]) -> (%904:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=445))] (%904:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=444)]) -> (%905:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446)]) - cf.ReturnOp (%905:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=446)], %886:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=434)], %888:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=436)]) -> () - } - } - graph.SubGraphOp @model.layers.12.mlp [using_qnn:true, symbol:model.layers.12.mlp] { - (%907:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447)]) -> (%912:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=448))] (%907:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447)]) -> (%908:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=450), )] (%908:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=449)]) -> (%909:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=450)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=452), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=451))] (%907:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=447)]) -> (%910:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=452)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=450), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=452), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=450), )] (%909:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=450)], %910:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=452)]) -> (%911:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=450)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=450), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=453))] (%911:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=450)]) -> (%912:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454)]) - cf.ReturnOp (%912:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454)]) -> () - } - } - graph.SubGraphOp @model.layers.13 [using_qnn:true, symbol:model.layers.13] { - (%913:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %346:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %347:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%954:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=464)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=466)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455), )] (%913:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454)]) -> (%914:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455)]) - graph.CallGraphOp @model.layers.13.self_attn (%914:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %346:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %347:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%946:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=464)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=466)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476), )] (%946:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476)], %913:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=454)]) -> (%947:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477), )] (%947:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476)]) -> (%948:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477)]) - graph.CallGraphOp @model.layers.13.mlp (%948:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477)]) -> (%953:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484), )] (%953:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484)], %947:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476)]) -> (%954:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484)]) - cf.ReturnOp (%954:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=464)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=466)]) -> () - } - } - graph.SubGraphOp @model.layers.13.self_attn [using_qnn:true, symbol:model.layers.13.self_attn] { - (%914:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %346:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %347:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%946:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=464)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=466)]) { - linalg.CPU.LinearOp (%914:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455)]) -> (%915:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=460)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=456))] (%914:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455)]) -> (%916:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=459), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=458))] (%914:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=455)]) -> (%917:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=459)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=460), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=460), )] (%915:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=460)]) -> (%915:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=460)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=460), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=460), )] (%915:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=460)]) -> (%918:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=460)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457), )] (%916:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457)]) -> (%916:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457), )] (%916:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457)]) -> (%919:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=459), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=459), )] (%917:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=459)]) -> (%917:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=459)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=459), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=459), )] (%917:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=459)]) -> (%920:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=459)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=460), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461), )] (%918:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=460)]) -> (%921:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462), )] (%919:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=457)]) -> (%922:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461), )] (%921:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%923:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462), )] (%922:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%924:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462), outputs_0:QuantSpec(Raw(type: Float16), uuid=463), )] (%924:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=462)]) -> (%925:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=463)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=463), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=464), )] (%925:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=463)]) -> (%926:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=464)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=464), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=464), )] (%926:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=464)]) -> (%927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=464)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=459), outputs_0:QuantSpec(Raw(type: Float16), uuid=465), )] (%920:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=459)]) -> (%928:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=465)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=465), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=466), )] (%928:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=465)]) -> (%929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=466)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=464), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), )] (%346:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=464)]) -> (%930:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=466), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), )] (%347:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=466)]) -> (%931:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), )] (%930:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) -> (%932:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), )] (%931:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%933:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=467), )] (%923:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=461)], %932:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) -> (%934:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=467)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=467), inputs_1:QuantSpec(Raw(type: Float32), uuid=468), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=467), )] (%934:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=467)], %935:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=468), constant:[0.088388346]]) -> (%936:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=467)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=467), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469), )] (%936:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=467)]) -> (%937:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469), inputs_1:QuantSpec(Raw(type: Int16), uuid=470), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469), )] (%937:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469)], %938:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=470), constant:[-20]]) -> (%939:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=471), outputs_0:QuantSpec(Raw(type: UInt8), uuid=472), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %940:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=471), constant:[-0.78515625]]) -> (%941:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=472)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=472), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=467), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469), )] (%941:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=472)], %936:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=467)], %939:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469)]) -> (%942:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473), )] (%942:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=469)]) -> (%943:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474), )] (%943:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=473)], %933:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=44)]) -> (%944:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474), )] (%944:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474)]) -> (%945:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474), )] (%945:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474)]) -> (%945:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=475))] (%945:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=474)]) -> (%946:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476)]) - cf.ReturnOp (%946:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=476)], %927:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=464)], %929:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=466)]) -> () - } - } - graph.SubGraphOp @model.layers.13.mlp [using_qnn:true, symbol:model.layers.13.mlp] { - (%948:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477)]) -> (%953:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=478))] (%948:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477)]) -> (%949:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=480), )] (%949:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=479)]) -> (%950:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=480)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=482), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=481))] (%948:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=477)]) -> (%951:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=482)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=480), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=482), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=480), )] (%950:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=480)], %951:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=482)]) -> (%952:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=480)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=480), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=483))] (%952:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=480)]) -> (%953:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484)]) - cf.ReturnOp (%953:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484)]) -> () - } - } - graph.SubGraphOp @model.layers.14 [using_qnn:true, symbol:model.layers.14] { - (%954:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %348:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %349:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%995:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=494)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=496)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=485), )] (%954:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484)]) -> (%955:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=485)]) - graph.CallGraphOp @model.layers.14.self_attn (%955:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=485)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %348:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %349:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%987:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=494)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=496)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506), )] (%987:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)], %954:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=484)]) -> (%988:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507), )] (%988:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)]) -> (%989:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) - graph.CallGraphOp @model.layers.14.mlp (%989:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) -> (%994:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514), )] (%994:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514)], %988:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)]) -> (%995:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514)]) - cf.ReturnOp (%995:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=494)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=496)]) -> () - } - } - graph.SubGraphOp @model.layers.14.self_attn [using_qnn:true, symbol:model.layers.14.self_attn] { - (%955:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=485)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %348:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %349:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%987:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=494)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=496)]) { - linalg.CPU.LinearOp (%955:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=485)]) -> (%956:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=490)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=485), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=487), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=486))] (%955:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=485)]) -> (%957:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=487)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=485), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=488))] (%955:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=485)]) -> (%958:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=490), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=490), )] (%956:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=490)]) -> (%956:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=490)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=490), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=490), )] (%956:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=490)]) -> (%959:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=490)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=487), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=487), )] (%957:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=487)]) -> (%957:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=487)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=487), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=487), )] (%957:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=487)]) -> (%960:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=487)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489), )] (%958:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489)]) -> (%958:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489), )] (%958:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489)]) -> (%961:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=490), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491), )] (%959:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=490)]) -> (%962:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=487), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=492), )] (%960:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=487)]) -> (%963:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=492)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491), )] (%962:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%964:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=492), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=492), )] (%963:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=492)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%965:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=492)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=492), outputs_0:QuantSpec(Raw(type: Float16), uuid=493), )] (%965:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=492)]) -> (%966:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=493)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=493), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=494), )] (%966:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=493)]) -> (%967:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=494)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=494), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=494), )] (%967:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=494)]) -> (%968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=494)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489), outputs_0:QuantSpec(Raw(type: Float16), uuid=495), )] (%961:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=489)]) -> (%969:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=495)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=495), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=496), )] (%969:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=495)]) -> (%970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=496)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=494), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), )] (%348:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=494)]) -> (%971:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=496), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), )] (%349:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=496)]) -> (%972:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), )] (%971:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) -> (%973:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), )] (%972:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%974:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497), )] (%964:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=491)], %973:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) -> (%975:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497), inputs_1:QuantSpec(Raw(type: Float32), uuid=498), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497), )] (%975:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497)], %976:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=498), constant:[0.088388346]]) -> (%977:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499), )] (%977:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497)]) -> (%978:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499), inputs_1:QuantSpec(Raw(type: Int16), uuid=500), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499), )] (%978:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499)], %979:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=500), constant:[-20]]) -> (%980:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=501), outputs_0:QuantSpec(Raw(type: UInt8), uuid=502), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %981:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=501), constant:[-0.46289062]]) -> (%982:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=502)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=502), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499), )] (%982:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=502)], %977:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=497)], %980:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499)]) -> (%983:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503), )] (%983:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=499)]) -> (%984:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=504), )] (%984:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=503)], %974:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=45)]) -> (%985:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=504)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=504), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=504), )] (%985:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=504)]) -> (%986:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=504)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=504), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=504), )] (%986:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=504)]) -> (%986:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=504)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=504), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=505))] (%986:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=504)]) -> (%987:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)]) - cf.ReturnOp (%987:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=506)], %968:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=494)], %970:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=496)]) -> () - } - } - graph.SubGraphOp @model.layers.14.mlp [using_qnn:true, symbol:model.layers.14.mlp] { - (%989:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) -> (%994:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=509), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=508))] (%989:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) -> (%990:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=509)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=509), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510), )] (%990:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=509)]) -> (%991:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=512), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=511))] (%989:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=507)]) -> (%992:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=512)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=512), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510), )] (%991:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510)], %992:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=512)]) -> (%993:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=513))] (%993:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=510)]) -> (%994:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514)]) - cf.ReturnOp (%994:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514)]) -> () - } - } - graph.SubGraphOp @model.layers.15 [using_qnn:true, symbol:model.layers.15] { - (%995:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %350:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %351:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%1036:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=524)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=526)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515), )] (%995:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514)]) -> (%996:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515)]) - graph.CallGraphOp @model.layers.15.self_attn (%996:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %350:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %351:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%1028:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=524)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=526)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536), )] (%1028:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536)], %995:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=514)]) -> (%1029:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537), )] (%1029:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536)]) -> (%1030:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537)]) - graph.CallGraphOp @model.layers.15.mlp (%1030:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537)]) -> (%1035:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544), )] (%1035:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)], %1029:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536)]) -> (%1036:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)]) - cf.ReturnOp (%1036:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=524)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=526)]) -> () - } - } - graph.SubGraphOp @model.layers.15.self_attn [using_qnn:true, symbol:model.layers.15.self_attn] { - (%996:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %350:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %351:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%1028:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=524)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=526)]) { - linalg.CPU.LinearOp (%996:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515)]) -> (%997:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=520)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=516))] (%996:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515)]) -> (%998:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=519), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=518))] (%996:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=515)]) -> (%999:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=519)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=520), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=520), )] (%997:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=520)]) -> (%997:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=520)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=520), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=520), )] (%997:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=520)]) -> (%1000:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=520)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517), )] (%998:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517)]) -> (%998:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517), )] (%998:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517)]) -> (%1001:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=519), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=519), )] (%999:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=519)]) -> (%999:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=519)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=519), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=519), )] (%999:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=519)]) -> (%1002:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=519)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=520), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=521), )] (%1000:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=520)]) -> (%1003:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=521)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=522), )] (%1001:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=517)]) -> (%1004:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=522)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=521), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=521), )] (%1003:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=521)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1005:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=521)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=522), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=522), )] (%1004:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=522)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1006:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=522)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=522), outputs_0:QuantSpec(Raw(type: Float16), uuid=523), )] (%1006:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=522)]) -> (%1007:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=523)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=523), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=524), )] (%1007:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=523)]) -> (%1008:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=524)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=524), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=524), )] (%1008:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=524)]) -> (%1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=524)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=519), outputs_0:QuantSpec(Raw(type: Float16), uuid=525), )] (%1002:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=519)]) -> (%1010:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=525)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=525), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=526), )] (%1010:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=525)]) -> (%1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=526)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=524), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), )] (%350:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=524)]) -> (%1012:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=526), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), )] (%351:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=526)]) -> (%1013:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), )] (%1012:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) -> (%1014:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), )] (%1013:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%1015:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=521), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=527), )] (%1005:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=521)], %1014:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) -> (%1016:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=527)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=527), inputs_1:QuantSpec(Raw(type: Float32), uuid=528), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=527), )] (%1016:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=527)], %1017:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=528), constant:[0.088388346]]) -> (%1018:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=527)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=527), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529), )] (%1018:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=527)]) -> (%1019:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529), inputs_1:QuantSpec(Raw(type: Int16), uuid=530), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529), )] (%1019:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529)], %1020:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=530), constant:[-20]]) -> (%1021:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=531), outputs_0:QuantSpec(Raw(type: UInt8), uuid=532), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1022:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=531), constant:[0.953125]]) -> (%1023:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=532)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=532), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=527), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529), )] (%1023:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=532)], %1018:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=527)], %1021:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529)]) -> (%1024:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533), )] (%1024:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=529)]) -> (%1025:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=534), )] (%1025:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=533)], %1015:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=46)]) -> (%1026:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=534)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=534), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=534), )] (%1026:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=534)]) -> (%1027:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=534)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=534), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=534), )] (%1027:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=534)]) -> (%1027:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=534)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=534), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=535))] (%1027:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=534)]) -> (%1028:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536)]) - cf.ReturnOp (%1028:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=536)], %1009:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=524)], %1011:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=526)]) -> () - } - } - graph.SubGraphOp @model.layers.15.mlp [using_qnn:true, symbol:model.layers.15.mlp] { - (%1030:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537)]) -> (%1035:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=539), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=538))] (%1030:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537)]) -> (%1031:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=539)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=539), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540), )] (%1031:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=539)]) -> (%1032:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=541))] (%1030:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=537)]) -> (%1033:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540), )] (%1032:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540)], %1033:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=542)]) -> (%1034:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=543))] (%1034:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=540)]) -> (%1035:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)]) - cf.ReturnOp (%1035:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)]) -> () - } - } - graph.SubGraphOp @model.layers.16 [using_qnn:true, symbol:model.layers.16] { - (%1036:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %352:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %353:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%1077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545), )] (%1036:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)]) -> (%1037:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545)]) - graph.CallGraphOp @model.layers.16.self_attn (%1037:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %352:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %353:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%1069:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566), )] (%1069:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)], %1036:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=544)]) -> (%1070:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567), )] (%1070:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)]) -> (%1071:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567)]) - graph.CallGraphOp @model.layers.16.mlp (%1071:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567)]) -> (%1076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574), )] (%1076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)], %1070:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)]) -> (%1077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)]) - cf.ReturnOp (%1077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) -> () - } - } - graph.SubGraphOp @model.layers.16.self_attn [using_qnn:true, symbol:model.layers.16.self_attn] { - (%1037:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %352:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %353:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%1069:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) { - linalg.CPU.LinearOp (%1037:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545)]) -> (%1038:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=550)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=546))] (%1037:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545)]) -> (%1039:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=548))] (%1037:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=545)]) -> (%1040:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=550), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=550), )] (%1038:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=550)]) -> (%1038:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=550)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=550), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=550), )] (%1038:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=550)]) -> (%1041:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=550)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547), )] (%1039:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547)]) -> (%1039:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547), )] (%1039:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547)]) -> (%1042:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549), )] (%1040:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549)]) -> (%1040:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549), )] (%1040:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549)]) -> (%1043:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=550), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551), )] (%1041:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=550)]) -> (%1044:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=552), )] (%1042:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=547)]) -> (%1045:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=552)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551), )] (%1044:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1046:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=552), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=552), )] (%1045:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=552)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1047:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=552)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=552), outputs_0:QuantSpec(Raw(type: Float16), uuid=553), )] (%1047:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=552)]) -> (%1048:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=553)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=553), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554), )] (%1048:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=553)]) -> (%1049:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554), )] (%1049:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)]) -> (%1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549), outputs_0:QuantSpec(Raw(type: Float16), uuid=555), )] (%1043:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=549)]) -> (%1051:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=555)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=555), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556), )] (%1051:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=555)]) -> (%1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), )] (%352:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)]) -> (%1053:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), )] (%353:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) -> (%1054:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), )] (%1053:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) -> (%1055:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), )] (%1054:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%1056:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557), )] (%1046:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=551)], %1055:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) -> (%1057:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557), inputs_1:QuantSpec(Raw(type: Float32), uuid=558), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557), )] (%1057:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557)], %1058:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=558), constant:[0.088388346]]) -> (%1059:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559), )] (%1059:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557)]) -> (%1060:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559), inputs_1:QuantSpec(Raw(type: Int16), uuid=560), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559), )] (%1060:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559)], %1061:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=560), constant:[-20]]) -> (%1062:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=561), outputs_0:QuantSpec(Raw(type: UInt8), uuid=562), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1063:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=561), constant:[0.118652344]]) -> (%1064:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=562)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=562), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559), )] (%1064:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=562)], %1059:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=557)], %1062:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559)]) -> (%1065:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563), )] (%1065:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=559)]) -> (%1066:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564), )] (%1066:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=563)], %1056:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=47)]) -> (%1067:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564), )] (%1067:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564)]) -> (%1068:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564), )] (%1068:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564)]) -> (%1068:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=565))] (%1068:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=564)]) -> (%1069:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)]) - cf.ReturnOp (%1069:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=566)], %1050:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=554)], %1052:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=556)]) -> () - } - } - graph.SubGraphOp @model.layers.16.mlp [using_qnn:true, symbol:model.layers.16.mlp] { - (%1071:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567)]) -> (%1076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=569), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=568))] (%1071:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567)]) -> (%1072:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=569)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=569), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570), )] (%1072:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=569)]) -> (%1073:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=572), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=571))] (%1071:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=567)]) -> (%1074:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=572)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=572), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570), )] (%1073:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570)], %1074:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=572)]) -> (%1075:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=573))] (%1075:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=570)]) -> (%1076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)]) - cf.ReturnOp (%1076:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)]) -> () - } - } - graph.SubGraphOp @model.layers.17 [using_qnn:true, symbol:model.layers.17] { - (%1077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %354:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %355:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%1118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=584)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=586)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575), )] (%1077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)]) -> (%1078:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) - graph.CallGraphOp @model.layers.17.self_attn (%1078:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %354:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %355:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%1110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=596)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=584)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=586)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=596), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=596), )] (%1110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=596)], %1077:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=574)]) -> (%1111:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=596)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=596), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597), )] (%1111:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=596)]) -> (%1112:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597)]) - graph.CallGraphOp @model.layers.17.mlp (%1112:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597)]) -> (%1117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=596), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604), )] (%1117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)], %1111:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=596)]) -> (%1118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)]) - cf.ReturnOp (%1118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=584)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=586)]) -> () - } - } - graph.SubGraphOp @model.layers.17.self_attn [using_qnn:true, symbol:model.layers.17.self_attn] { - (%1078:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %354:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %355:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%1110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=596)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=584)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=586)]) { - linalg.CPU.LinearOp (%1078:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) -> (%1079:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=580)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=577), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=576))] (%1078:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) -> (%1080:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=577)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=578))] (%1078:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=575)]) -> (%1081:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=580), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=580), )] (%1079:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=580)]) -> (%1079:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=580)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=580), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=580), )] (%1079:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=580)]) -> (%1082:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=580)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=577), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=577), )] (%1080:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=577)]) -> (%1080:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=577)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=577), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=577), )] (%1080:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=577)]) -> (%1083:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=577)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579), )] (%1081:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579)]) -> (%1081:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579), )] (%1081:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579)]) -> (%1084:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=580), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581), )] (%1082:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=580)]) -> (%1085:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=577), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=582), )] (%1083:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=577)]) -> (%1086:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=582)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581), )] (%1085:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1087:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=582), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=582), )] (%1086:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=582)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1088:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=582)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=582), outputs_0:QuantSpec(Raw(type: Float16), uuid=583), )] (%1088:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=582)]) -> (%1089:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=583)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=583), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=584), )] (%1089:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=583)]) -> (%1090:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=584)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=584), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=584), )] (%1090:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=584)]) -> (%1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=584)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579), outputs_0:QuantSpec(Raw(type: Float16), uuid=585), )] (%1084:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=579)]) -> (%1092:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=585)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=585), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=586), )] (%1092:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=585)]) -> (%1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=586)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=584), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), )] (%354:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=584)]) -> (%1094:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=586), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), )] (%355:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=586)]) -> (%1095:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), )] (%1094:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) -> (%1096:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), )] (%1095:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%1097:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=587), )] (%1087:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=581)], %1096:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) -> (%1098:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=587)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=587), inputs_1:QuantSpec(Raw(type: Float32), uuid=588), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=587), )] (%1098:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=587)], %1099:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=588), constant:[0.088388346]]) -> (%1100:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=587)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=587), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=589), )] (%1100:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=587)]) -> (%1101:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=589)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=589), inputs_1:QuantSpec(Raw(type: Int16), uuid=590), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=589), )] (%1101:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=589)], %1102:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=590), constant:[-20]]) -> (%1103:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=589)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=591), outputs_0:QuantSpec(Raw(type: UInt8), uuid=592), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1104:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=591), constant:[-0.99609375]]) -> (%1105:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=592)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=592), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=587), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=589), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=589), )] (%1105:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=592)], %1100:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=587)], %1103:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=589)]) -> (%1106:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=589)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=589), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593), )] (%1106:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=589)]) -> (%1107:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=594), )] (%1107:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=593)], %1097:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=48)]) -> (%1108:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=594)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=594), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=594), )] (%1108:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=594)]) -> (%1109:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=594)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=594), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=594), )] (%1109:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=594)]) -> (%1109:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=594)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=594), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=596), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=595))] (%1109:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=594)]) -> (%1110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=596)]) - cf.ReturnOp (%1110:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=596)], %1091:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=584)], %1093:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=586)]) -> () - } - } - graph.SubGraphOp @model.layers.17.mlp [using_qnn:true, symbol:model.layers.17.mlp] { - (%1112:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597)]) -> (%1117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=599), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=598))] (%1112:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597)]) -> (%1113:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=599)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=599), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600), )] (%1113:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=599)]) -> (%1114:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=602), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=601))] (%1112:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=597)]) -> (%1115:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=602)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=602), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600), )] (%1114:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)], %1115:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=602)]) -> (%1116:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=603))] (%1116:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=600)]) -> (%1117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)]) - cf.ReturnOp (%1117:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)]) -> () - } - } - graph.SubGraphOp @model.layers.18 [using_qnn:true, symbol:model.layers.18] { - (%1118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %356:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %357:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%1159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=614)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=616)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605), )] (%1118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)]) -> (%1119:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605)]) - graph.CallGraphOp @model.layers.18.self_attn (%1119:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %356:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %357:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%1151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=614)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=616)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626), )] (%1151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626)], %1118:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=604)]) -> (%1152:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627), )] (%1152:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626)]) -> (%1153:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627)]) - graph.CallGraphOp @model.layers.18.mlp (%1153:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627)]) -> (%1158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634), )] (%1158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)], %1152:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626)]) -> (%1159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) - cf.ReturnOp (%1159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=614)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=616)]) -> () - } - } - graph.SubGraphOp @model.layers.18.self_attn [using_qnn:true, symbol:model.layers.18.self_attn] { - (%1119:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %356:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %357:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%1151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=614)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=616)]) { - linalg.CPU.LinearOp (%1119:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605)]) -> (%1120:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=610)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=606))] (%1119:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605)]) -> (%1121:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=608))] (%1119:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=605)]) -> (%1122:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=610), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=610), )] (%1120:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=610)]) -> (%1120:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=610)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=610), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=610), )] (%1120:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=610)]) -> (%1123:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=610)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607), )] (%1121:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607)]) -> (%1121:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607), )] (%1121:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607)]) -> (%1124:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), )] (%1122:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) -> (%1122:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), )] (%1122:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) -> (%1125:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=610), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=611), )] (%1123:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=610)]) -> (%1126:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=611)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612), )] (%1124:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=607)]) -> (%1127:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=611), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=611), )] (%1126:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=611)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1128:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=611)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612), )] (%1127:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1129:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612), outputs_0:QuantSpec(Raw(type: Float16), uuid=613), )] (%1129:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=612)]) -> (%1130:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=613)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=613), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=614), )] (%1130:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=613)]) -> (%1131:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=614)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=614), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=614), )] (%1131:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=614)]) -> (%1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=614)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609), outputs_0:QuantSpec(Raw(type: Float16), uuid=615), )] (%1125:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=609)]) -> (%1133:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=615)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=615), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=616), )] (%1133:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=615)]) -> (%1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=616)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=614), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), )] (%356:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=614)]) -> (%1135:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=616), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), )] (%357:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=616)]) -> (%1136:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), )] (%1135:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) -> (%1137:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), )] (%1136:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%1138:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=611), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617), )] (%1128:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=611)], %1137:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) -> (%1139:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617), inputs_1:QuantSpec(Raw(type: Float32), uuid=618), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617), )] (%1139:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617)], %1140:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=618), constant:[0.088388346]]) -> (%1141:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619), )] (%1141:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617)]) -> (%1142:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619), inputs_1:QuantSpec(Raw(type: Int16), uuid=620), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619), )] (%1142:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619)], %1143:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=620), constant:[-20]]) -> (%1144:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=621), outputs_0:QuantSpec(Raw(type: UInt8), uuid=622), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1145:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=621), constant:[0.24023438]]) -> (%1146:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=622)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=622), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619), )] (%1146:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=622)], %1141:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=617)], %1144:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619)]) -> (%1147:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=623), )] (%1147:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=619)]) -> (%1148:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=623)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=623), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624), )] (%1148:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=623)], %1138:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=49)]) -> (%1149:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624), )] (%1149:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624)]) -> (%1150:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624), )] (%1150:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624)]) -> (%1150:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=625))] (%1150:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=624)]) -> (%1151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626)]) - cf.ReturnOp (%1151:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=626)], %1132:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=614)], %1134:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=616)]) -> () - } - } - graph.SubGraphOp @model.layers.18.mlp [using_qnn:true, symbol:model.layers.18.mlp] { - (%1153:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627)]) -> (%1158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=629), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=628))] (%1153:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627)]) -> (%1154:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=629)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=629), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=630), )] (%1154:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=629)]) -> (%1155:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=630)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=631))] (%1153:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=627)]) -> (%1156:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=630), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=630), )] (%1155:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=630)], %1156:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=632)]) -> (%1157:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=630)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=630), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=633))] (%1157:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=630)]) -> (%1158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) - cf.ReturnOp (%1158:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) -> () - } - } - graph.SubGraphOp @model.layers.19 [using_qnn:true, symbol:model.layers.19] { - (%1159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %358:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %359:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%1200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=644)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=646)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635), )] (%1159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) -> (%1160:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635)]) - graph.CallGraphOp @model.layers.19.self_attn (%1160:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %358:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %359:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%1192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=656)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=644)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=646)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=656), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=656), )] (%1192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=656)], %1159:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=634)]) -> (%1193:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=656)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=656), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=657), )] (%1193:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=656)]) -> (%1194:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=657)]) - graph.CallGraphOp @model.layers.19.mlp (%1194:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=657)]) -> (%1199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=656), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664), )] (%1199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664)], %1193:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=656)]) -> (%1200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664)]) - cf.ReturnOp (%1200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=644)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=646)]) -> () - } - } - graph.SubGraphOp @model.layers.19.self_attn [using_qnn:true, symbol:model.layers.19.self_attn] { - (%1160:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %358:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %359:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%1192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=656)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=644)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=646)]) { - linalg.CPU.LinearOp (%1160:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635)]) -> (%1161:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=640)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=637), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=636))] (%1160:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635)]) -> (%1162:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=637)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=638))] (%1160:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=635)]) -> (%1163:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=640), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=640), )] (%1161:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=640)]) -> (%1161:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=640)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=640), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=640), )] (%1161:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=640)]) -> (%1164:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=640)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=637), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=637), )] (%1162:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=637)]) -> (%1162:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=637)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=637), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=637), )] (%1162:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=637)]) -> (%1165:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=637)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639), )] (%1163:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639)]) -> (%1163:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639), )] (%1163:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639)]) -> (%1166:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=640), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=641), )] (%1164:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=640)]) -> (%1167:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=641)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=637), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642), )] (%1165:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=637)]) -> (%1168:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=641), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=641), )] (%1167:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=641)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1169:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=641)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642), )] (%1168:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1170:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642), outputs_0:QuantSpec(Raw(type: Float16), uuid=643), )] (%1170:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=642)]) -> (%1171:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=643)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=643), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=644), )] (%1171:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=643)]) -> (%1172:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=644)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=644), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=644), )] (%1172:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=644)]) -> (%1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=644)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639), outputs_0:QuantSpec(Raw(type: Float16), uuid=645), )] (%1166:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=639)]) -> (%1174:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=645)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=645), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=646), )] (%1174:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=645)]) -> (%1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=646)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=644), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), )] (%358:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=644)]) -> (%1176:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=646), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), )] (%359:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=646)]) -> (%1177:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), )] (%1176:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) -> (%1178:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), )] (%1177:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%1179:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=641), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647), )] (%1169:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=641)], %1178:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) -> (%1180:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647), inputs_1:QuantSpec(Raw(type: Float32), uuid=648), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647), )] (%1180:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647)], %1181:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=648), constant:[0.088388346]]) -> (%1182:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649), )] (%1182:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647)]) -> (%1183:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649), inputs_1:QuantSpec(Raw(type: Int16), uuid=650), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649), )] (%1183:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649)], %1184:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=650), constant:[-20]]) -> (%1185:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=651), outputs_0:QuantSpec(Raw(type: UInt8), uuid=652), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1186:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=651), constant:[0.55078125]]) -> (%1187:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=652)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=652), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649), )] (%1187:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=652)], %1182:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=647)], %1185:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649)]) -> (%1188:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653), )] (%1188:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=649)]) -> (%1189:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=654), )] (%1189:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=653)], %1179:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=50)]) -> (%1190:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=654)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=654), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=654), )] (%1190:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=654)]) -> (%1191:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=654)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=654), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=654), )] (%1191:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=654)]) -> (%1191:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=654)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=654), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=656), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=655))] (%1191:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=654)]) -> (%1192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=656)]) - cf.ReturnOp (%1192:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=656)], %1173:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=644)], %1175:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=646)]) -> () - } - } - graph.SubGraphOp @model.layers.19.mlp [using_qnn:true, symbol:model.layers.19.mlp] { - (%1194:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=657)]) -> (%1199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=657), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=658))] (%1194:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=657)]) -> (%1195:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660), )] (%1195:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=659)]) -> (%1196:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=657), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=662), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=661))] (%1194:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=657)]) -> (%1197:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=662)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=662), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660), )] (%1196:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660)], %1197:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=662)]) -> (%1198:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=663))] (%1198:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=660)]) -> (%1199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664)]) - cf.ReturnOp (%1199:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664)]) -> () - } - } - graph.SubGraphOp @model.layers.20 [using_qnn:true, symbol:model.layers.20] { - (%1200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %360:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %361:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%1241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=674)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=676)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665), )] (%1200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664)]) -> (%1201:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665)]) - graph.CallGraphOp @model.layers.20.self_attn (%1201:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %360:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %361:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%1233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=674)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=676)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686), )] (%1233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686)], %1200:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=664)]) -> (%1234:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687), )] (%1234:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686)]) -> (%1235:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687)]) - graph.CallGraphOp @model.layers.20.mlp (%1235:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687)]) -> (%1240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694), )] (%1240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)], %1234:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686)]) -> (%1241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)]) - cf.ReturnOp (%1241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=674)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=676)]) -> () - } - } - graph.SubGraphOp @model.layers.20.self_attn [using_qnn:true, symbol:model.layers.20.self_attn] { - (%1201:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %360:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %361:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%1233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=674)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=676)]) { - linalg.CPU.LinearOp (%1201:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665)]) -> (%1202:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=670)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=666))] (%1201:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665)]) -> (%1203:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=668))] (%1201:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=665)]) -> (%1204:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=670), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=670), )] (%1202:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=670)]) -> (%1202:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=670)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=670), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=670), )] (%1202:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=670)]) -> (%1205:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=670)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667), )] (%1203:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667)]) -> (%1203:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667), )] (%1203:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667)]) -> (%1206:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669), )] (%1204:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669)]) -> (%1204:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669), )] (%1204:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669)]) -> (%1207:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=670), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=671), )] (%1205:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=670)]) -> (%1208:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=671)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672), )] (%1206:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=667)]) -> (%1209:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=671), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=671), )] (%1208:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=671)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1210:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=671)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672), )] (%1209:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1211:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672), outputs_0:QuantSpec(Raw(type: Float16), uuid=673), )] (%1211:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=672)]) -> (%1212:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=673)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=673), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=674), )] (%1212:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=673)]) -> (%1213:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=674)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=674), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=674), )] (%1213:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=674)]) -> (%1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=674)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669), outputs_0:QuantSpec(Raw(type: Float16), uuid=675), )] (%1207:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=669)]) -> (%1215:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=675)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=675), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=676), )] (%1215:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=675)]) -> (%1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=676)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=674), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), )] (%360:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=674)]) -> (%1217:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=676), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), )] (%361:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=676)]) -> (%1218:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), )] (%1217:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) -> (%1219:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), )] (%1218:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%1220:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=671), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677), )] (%1210:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=671)], %1219:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) -> (%1221:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677), inputs_1:QuantSpec(Raw(type: Float32), uuid=678), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677), )] (%1221:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)], %1222:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=678), constant:[0.088388346]]) -> (%1223:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=679), )] (%1223:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)]) -> (%1224:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=679)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=679), inputs_1:QuantSpec(Raw(type: Int16), uuid=680), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=679), )] (%1224:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=679)], %1225:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=680), constant:[-20]]) -> (%1226:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=679)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=681), outputs_0:QuantSpec(Raw(type: UInt8), uuid=682), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1227:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=681), constant:[0.71875]]) -> (%1228:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=682)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=682), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=679), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=679), )] (%1228:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=682)], %1223:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=677)], %1226:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=679)]) -> (%1229:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=679)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=679), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683), )] (%1229:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=679)]) -> (%1230:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684), )] (%1230:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=683)], %1220:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=51)]) -> (%1231:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684), )] (%1231:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684)]) -> (%1232:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684), )] (%1232:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684)]) -> (%1232:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=685))] (%1232:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=684)]) -> (%1233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686)]) - cf.ReturnOp (%1233:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=686)], %1214:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=674)], %1216:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=676)]) -> () - } - } - graph.SubGraphOp @model.layers.20.mlp [using_qnn:true, symbol:model.layers.20.mlp] { - (%1235:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687)]) -> (%1240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=689), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=688))] (%1235:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687)]) -> (%1236:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=689)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=689), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=690), )] (%1236:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=689)]) -> (%1237:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=690)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=692), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=691))] (%1235:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=687)]) -> (%1238:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=692)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=690), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=692), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=690), )] (%1237:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=690)], %1238:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=692)]) -> (%1239:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=690)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=690), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=693))] (%1239:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=690)]) -> (%1240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)]) - cf.ReturnOp (%1240:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)]) -> () - } - } - graph.SubGraphOp @model.layers.21 [using_qnn:true, symbol:model.layers.21] { - (%1241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %362:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %363:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%1282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=724)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=704)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=706)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695), )] (%1241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)]) -> (%1242:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695)]) - graph.CallGraphOp @model.layers.21.self_attn (%1242:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %362:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %363:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%1274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=704)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=706)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716), )] (%1274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716)], %1241:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=694)]) -> (%1275:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717), )] (%1275:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716)]) -> (%1276:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717)]) - graph.CallGraphOp @model.layers.21.mlp (%1276:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717)]) -> (%1281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=724)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=724), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=724), )] (%1281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=724)], %1275:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716)]) -> (%1282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=724)]) - cf.ReturnOp (%1282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=724)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=704)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=706)]) -> () - } - } - graph.SubGraphOp @model.layers.21.self_attn [using_qnn:true, symbol:model.layers.21.self_attn] { - (%1242:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %362:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %363:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%1274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=704)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=706)]) { - linalg.CPU.LinearOp (%1242:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695)]) -> (%1243:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=700)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=697), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=696))] (%1242:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695)]) -> (%1244:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=697)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=698))] (%1242:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=695)]) -> (%1245:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=700), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=700), )] (%1243:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=700)]) -> (%1243:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=700)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=700), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=700), )] (%1243:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=700)]) -> (%1246:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=700)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=697), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=697), )] (%1244:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=697)]) -> (%1244:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=697)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=697), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=697), )] (%1244:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=697)]) -> (%1247:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=697)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699), )] (%1245:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699)]) -> (%1245:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699), )] (%1245:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699)]) -> (%1248:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=700), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701), )] (%1246:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=700)]) -> (%1249:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=697), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702), )] (%1247:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=697)]) -> (%1250:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701), )] (%1249:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1251:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702), )] (%1250:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1252:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702), outputs_0:QuantSpec(Raw(type: Float16), uuid=703), )] (%1252:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=702)]) -> (%1253:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=703)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=703), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=704), )] (%1253:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=703)]) -> (%1254:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=704)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=704), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=704), )] (%1254:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=704)]) -> (%1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=704)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699), outputs_0:QuantSpec(Raw(type: Float16), uuid=705), )] (%1248:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=699)]) -> (%1256:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=705)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=705), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=706), )] (%1256:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=705)]) -> (%1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=706)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=704), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), )] (%362:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=704)]) -> (%1258:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=706), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), )] (%363:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=706)]) -> (%1259:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), )] (%1258:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) -> (%1260:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), )] (%1259:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%1261:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707), )] (%1251:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=701)], %1260:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) -> (%1262:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707), inputs_1:QuantSpec(Raw(type: Float32), uuid=708), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707), )] (%1262:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707)], %1263:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=708), constant:[0.088388346]]) -> (%1264:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709), )] (%1264:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707)]) -> (%1265:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709), inputs_1:QuantSpec(Raw(type: Int16), uuid=710), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709), )] (%1265:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709)], %1266:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=710), constant:[-20]]) -> (%1267:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=711), outputs_0:QuantSpec(Raw(type: UInt8), uuid=712), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1268:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=711), constant:[-0.80859375]]) -> (%1269:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=712)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=712), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709), )] (%1269:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=712)], %1264:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=707)], %1267:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709)]) -> (%1270:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=713), )] (%1270:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=709)]) -> (%1271:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=713)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=713), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714), )] (%1271:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=713)], %1261:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=52)]) -> (%1272:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714), )] (%1272:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714)]) -> (%1273:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714), )] (%1273:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714)]) -> (%1273:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=715))] (%1273:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=714)]) -> (%1274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716)]) - cf.ReturnOp (%1274:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=716)], %1255:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=704)], %1257:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=706)]) -> () - } - } - graph.SubGraphOp @model.layers.21.mlp [using_qnn:true, symbol:model.layers.21.mlp] { - (%1276:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717)]) -> (%1281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=724)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=718))] (%1276:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717)]) -> (%1277:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=720), )] (%1277:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=719)]) -> (%1278:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=720)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=722), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=721))] (%1276:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=717)]) -> (%1279:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=722)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=720), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=722), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=720), )] (%1278:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=720)], %1279:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=722)]) -> (%1280:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=720)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=720), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=724), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=723))] (%1280:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=720)]) -> (%1281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=724)]) - cf.ReturnOp (%1281:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=724)]) -> () - } - } - graph.SubGraphOp @model.layers.22 [using_qnn:true, symbol:model.layers.22] { - (%1282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=724)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %364:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %365:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%1323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=734)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=736)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=724), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=725), )] (%1282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=724)]) -> (%1283:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=725)]) - graph.CallGraphOp @model.layers.22.self_attn (%1283:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=725)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %364:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %365:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%1315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=734)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=736)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=724), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746), )] (%1315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)], %1282:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=724)]) -> (%1316:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=747), )] (%1316:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)]) -> (%1317:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=747)]) - graph.CallGraphOp @model.layers.22.mlp (%1317:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=747)]) -> (%1322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754), )] (%1322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754)], %1316:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)]) -> (%1323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754)]) - cf.ReturnOp (%1323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=734)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=736)]) -> () - } - } - graph.SubGraphOp @model.layers.22.self_attn [using_qnn:true, symbol:model.layers.22.self_attn] { - (%1283:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=725)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %364:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %365:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%1315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=734)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=736)]) { - linalg.CPU.LinearOp (%1283:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=725)]) -> (%1284:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=730)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=725), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=726))] (%1283:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=725)]) -> (%1285:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=725), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=728))] (%1283:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=725)]) -> (%1286:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=730), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=730), )] (%1284:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=730)]) -> (%1284:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=730)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=730), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=730), )] (%1284:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=730)]) -> (%1287:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=730)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727), )] (%1285:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727)]) -> (%1285:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727), )] (%1285:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727)]) -> (%1288:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729), )] (%1286:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729)]) -> (%1286:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729), )] (%1286:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729)]) -> (%1289:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=730), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=731), )] (%1287:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=730)]) -> (%1290:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=731)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=732), )] (%1288:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=727)]) -> (%1291:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=732)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=731), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=731), )] (%1290:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=731)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1292:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=731)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=732), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=732), )] (%1291:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=732)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1293:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=732)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=732), outputs_0:QuantSpec(Raw(type: Float16), uuid=733), )] (%1293:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=732)]) -> (%1294:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=733)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=733), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=734), )] (%1294:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=733)]) -> (%1295:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=734)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=734), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=734), )] (%1295:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=734)]) -> (%1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=734)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729), outputs_0:QuantSpec(Raw(type: Float16), uuid=735), )] (%1289:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=729)]) -> (%1297:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=735)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=735), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=736), )] (%1297:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=735)]) -> (%1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=736)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=734), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), )] (%364:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=734)]) -> (%1299:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=736), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), )] (%365:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=736)]) -> (%1300:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), )] (%1299:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) -> (%1301:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), )] (%1300:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%1302:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=731), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737), )] (%1292:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=731)], %1301:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) -> (%1303:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737), inputs_1:QuantSpec(Raw(type: Float32), uuid=738), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737), )] (%1303:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737)], %1304:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=738), constant:[0.088388346]]) -> (%1305:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=739), )] (%1305:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737)]) -> (%1306:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=739)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=739), inputs_1:QuantSpec(Raw(type: Int16), uuid=740), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=739), )] (%1306:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=739)], %1307:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=740), constant:[-20]]) -> (%1308:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=739)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=741), outputs_0:QuantSpec(Raw(type: UInt8), uuid=742), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1309:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=741), constant:[-0.42773438]]) -> (%1310:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=742)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=742), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=739), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=739), )] (%1310:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=742)], %1305:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=737)], %1308:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=739)]) -> (%1311:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=739)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=739), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=743), )] (%1311:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=739)]) -> (%1312:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=743)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=743), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744), )] (%1312:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=743)], %1302:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=53)]) -> (%1313:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744), )] (%1313:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)]) -> (%1314:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744), )] (%1314:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)]) -> (%1314:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=745))] (%1314:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=744)]) -> (%1315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)]) - cf.ReturnOp (%1315:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=746)], %1296:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=734)], %1298:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=736)]) -> () - } - } - graph.SubGraphOp @model.layers.22.mlp [using_qnn:true, symbol:model.layers.22.mlp] { - (%1317:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=747)]) -> (%1322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=747), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=748))] (%1317:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=747)]) -> (%1318:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750), )] (%1318:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=749)]) -> (%1319:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=747), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=752), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=751))] (%1317:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=747)]) -> (%1320:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=752)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=752), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750), )] (%1319:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750)], %1320:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=752)]) -> (%1321:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=753))] (%1321:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=750)]) -> (%1322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754)]) - cf.ReturnOp (%1322:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754)]) -> () - } - } - graph.SubGraphOp @model.layers.23 [using_qnn:true, symbol:model.layers.23] { - (%1323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %366:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %367:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%1364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=764)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=766)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755), )] (%1323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754)]) -> (%1324:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755)]) - graph.CallGraphOp @model.layers.23.self_attn (%1324:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %366:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %367:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%1356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=776)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=764)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=766)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=776), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=776), )] (%1356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=776)], %1323:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=754)]) -> (%1357:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=776)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=776), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=777), )] (%1357:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=776)]) -> (%1358:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=777)]) - graph.CallGraphOp @model.layers.23.mlp (%1358:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=777)]) -> (%1363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=776), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784), )] (%1363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)], %1357:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=776)]) -> (%1364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)]) - cf.ReturnOp (%1364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=764)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=766)]) -> () - } - } - graph.SubGraphOp @model.layers.23.self_attn [using_qnn:true, symbol:model.layers.23.self_attn] { - (%1324:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %366:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %367:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%1356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=776)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=764)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=766)]) { - linalg.CPU.LinearOp (%1324:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755)]) -> (%1325:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=760)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=757), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=756))] (%1324:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755)]) -> (%1326:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=757)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=759), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=758))] (%1324:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=755)]) -> (%1327:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=759)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=760), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=760), )] (%1325:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=760)]) -> (%1325:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=760)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=760), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=760), )] (%1325:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=760)]) -> (%1328:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=760)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=757), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=757), )] (%1326:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=757)]) -> (%1326:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=757)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=757), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=757), )] (%1326:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=757)]) -> (%1329:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=757)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=759), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=759), )] (%1327:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=759)]) -> (%1327:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=759)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=759), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=759), )] (%1327:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=759)]) -> (%1330:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=759)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=760), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761), )] (%1328:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=760)]) -> (%1331:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=757), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762), )] (%1329:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=757)]) -> (%1332:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761), )] (%1331:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1333:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762), )] (%1332:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1334:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762), outputs_0:QuantSpec(Raw(type: Float16), uuid=763), )] (%1334:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=762)]) -> (%1335:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=763)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=763), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=764), )] (%1335:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=763)]) -> (%1336:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=764)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=764), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=764), )] (%1336:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=764)]) -> (%1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=764)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=759), outputs_0:QuantSpec(Raw(type: Float16), uuid=765), )] (%1330:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=759)]) -> (%1338:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=765)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=765), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=766), )] (%1338:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=765)]) -> (%1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=766)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=764), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), )] (%366:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=764)]) -> (%1340:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=766), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), )] (%367:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=766)]) -> (%1341:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), )] (%1340:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) -> (%1342:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), )] (%1341:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%1343:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767), )] (%1333:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=761)], %1342:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) -> (%1344:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767), inputs_1:QuantSpec(Raw(type: Float32), uuid=768), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767), )] (%1344:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767)], %1345:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=768), constant:[0.088388346]]) -> (%1346:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769), )] (%1346:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767)]) -> (%1347:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769), inputs_1:QuantSpec(Raw(type: Int16), uuid=770), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769), )] (%1347:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769)], %1348:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=770), constant:[-20]]) -> (%1349:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=771), outputs_0:QuantSpec(Raw(type: UInt8), uuid=772), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1350:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=771), constant:[0.96484375]]) -> (%1351:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=772)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=772), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769), )] (%1351:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=772)], %1346:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=767)], %1349:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769)]) -> (%1352:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=773), )] (%1352:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=769)]) -> (%1353:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=773)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=773), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774), )] (%1353:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=773)], %1343:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=54)]) -> (%1354:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774), )] (%1354:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774)]) -> (%1355:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774), )] (%1355:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774)]) -> (%1355:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=776), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=775))] (%1355:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=774)]) -> (%1356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=776)]) - cf.ReturnOp (%1356:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=776)], %1337:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=764)], %1339:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=766)]) -> () - } - } - graph.SubGraphOp @model.layers.23.mlp [using_qnn:true, symbol:model.layers.23.mlp] { - (%1358:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=777)]) -> (%1363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=777), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=778))] (%1358:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=777)]) -> (%1359:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780), )] (%1359:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=779)]) -> (%1360:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=777), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=782), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=781))] (%1358:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=777)]) -> (%1361:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=782)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=782), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780), )] (%1360:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780)], %1361:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=782)]) -> (%1362:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=783))] (%1362:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=780)]) -> (%1363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)]) - cf.ReturnOp (%1363:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)]) -> () - } - } - graph.SubGraphOp @model.layers.24 [using_qnn:true, symbol:model.layers.24] { - (%1364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %368:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %369:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%1405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=796)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785), )] (%1364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)]) -> (%1365:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785)]) - graph.CallGraphOp @model.layers.24.self_attn (%1365:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %368:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %369:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%1397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=806)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=796)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=806), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=806), )] (%1397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=806)], %1364:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=784)]) -> (%1398:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=806)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=806), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=807), )] (%1398:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=806)]) -> (%1399:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=807)]) - graph.CallGraphOp @model.layers.24.mlp (%1399:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=807)]) -> (%1404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=806), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814), )] (%1404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)], %1398:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=806)]) -> (%1405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)]) - cf.ReturnOp (%1405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=796)]) -> () - } - } - graph.SubGraphOp @model.layers.24.self_attn [using_qnn:true, symbol:model.layers.24.self_attn] { - (%1365:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %368:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %369:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%1397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=806)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=796)]) { - linalg.CPU.LinearOp (%1365:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785)]) -> (%1366:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=790)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=786))] (%1365:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785)]) -> (%1367:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=788))] (%1365:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=785)]) -> (%1368:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=790), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=790), )] (%1366:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=790)]) -> (%1366:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=790)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=790), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=790), )] (%1366:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=790)]) -> (%1369:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=790)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787), )] (%1367:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787)]) -> (%1367:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787), )] (%1367:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787)]) -> (%1370:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789), )] (%1368:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789)]) -> (%1368:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789), )] (%1368:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789)]) -> (%1371:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=790), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=791), )] (%1369:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=790)]) -> (%1372:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=791)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=792), )] (%1370:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=787)]) -> (%1373:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=792)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=791), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=791), )] (%1372:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=791)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1374:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=791)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=792), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=792), )] (%1373:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=792)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1375:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=792)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=792), outputs_0:QuantSpec(Raw(type: Float16), uuid=793), )] (%1375:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=792)]) -> (%1376:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=793)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=793), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794), )] (%1376:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=793)]) -> (%1377:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794), )] (%1377:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) -> (%1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789), outputs_0:QuantSpec(Raw(type: Float16), uuid=795), )] (%1371:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=789)]) -> (%1379:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=795)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=795), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=796), )] (%1379:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=795)]) -> (%1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=796)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), )] (%368:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)]) -> (%1381:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=796), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), )] (%369:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=796)]) -> (%1382:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), )] (%1381:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) -> (%1383:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), )] (%1382:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%1384:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=791), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797), )] (%1374:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=791)], %1383:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) -> (%1385:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797), inputs_1:QuantSpec(Raw(type: Float32), uuid=798), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797), )] (%1385:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797)], %1386:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=798), constant:[0.088388346]]) -> (%1387:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=799), )] (%1387:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797)]) -> (%1388:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=799)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=799), inputs_1:QuantSpec(Raw(type: Int16), uuid=800), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=799), )] (%1388:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=799)], %1389:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=800), constant:[-20]]) -> (%1390:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=799)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=801), outputs_0:QuantSpec(Raw(type: UInt8), uuid=802), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1391:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=801), constant:[0.07910156]]) -> (%1392:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=802)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=802), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=799), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=799), )] (%1392:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=802)], %1387:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=797)], %1390:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=799)]) -> (%1393:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=799)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=799), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=803), )] (%1393:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=799)]) -> (%1394:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=803)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=803), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), )] (%1394:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=803)], %1384:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=55)]) -> (%1395:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), )] (%1395:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) -> (%1396:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), )] (%1396:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) -> (%1396:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=806), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=805))] (%1396:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=804)]) -> (%1397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=806)]) - cf.ReturnOp (%1397:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=806)], %1378:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=794)], %1380:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=796)]) -> () - } - } - graph.SubGraphOp @model.layers.24.mlp [using_qnn:true, symbol:model.layers.24.mlp] { - (%1399:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=807)]) -> (%1404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=807), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=808))] (%1399:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=807)]) -> (%1400:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=810), )] (%1400:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=809)]) -> (%1401:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=810)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=807), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=811))] (%1399:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=807)]) -> (%1402:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=810), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=810), )] (%1401:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=810)], %1402:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=812)]) -> (%1403:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=810)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=810), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=813))] (%1403:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=810)]) -> (%1404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)]) - cf.ReturnOp (%1404:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)]) -> () - } - } - graph.SubGraphOp @model.layers.25 [using_qnn:true, symbol:model.layers.25] { - (%1405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %370:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %371:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%1446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=824)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=815), )] (%1405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)]) -> (%1406:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=815)]) - graph.CallGraphOp @model.layers.25.self_attn (%1406:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=815)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %370:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %371:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%1438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=824)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836), )] (%1438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)], %1405:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=814)]) -> (%1439:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837), )] (%1439:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)]) -> (%1440:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837)]) - graph.CallGraphOp @model.layers.25.mlp (%1440:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837)]) -> (%1445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844), )] (%1445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844)], %1439:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)]) -> (%1446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844)]) - cf.ReturnOp (%1446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=824)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)]) -> () - } - } - graph.SubGraphOp @model.layers.25.self_attn [using_qnn:true, symbol:model.layers.25.self_attn] { - (%1406:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=815)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %370:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %371:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%1438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=824)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)]) { - linalg.CPU.LinearOp (%1406:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=815)]) -> (%1407:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=820)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=815), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=816))] (%1406:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=815)]) -> (%1408:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=815), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=818))] (%1406:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=815)]) -> (%1409:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=820), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=820), )] (%1407:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=820)]) -> (%1407:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=820)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=820), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=820), )] (%1407:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=820)]) -> (%1410:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=820)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817), )] (%1408:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817)]) -> (%1408:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817), )] (%1408:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817)]) -> (%1411:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819), )] (%1409:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819)]) -> (%1409:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819), )] (%1409:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819)]) -> (%1412:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=820), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821), )] (%1410:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=820)]) -> (%1413:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=822), )] (%1411:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=817)]) -> (%1414:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=822)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821), )] (%1413:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1415:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=822), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=822), )] (%1414:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=822)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1416:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=822)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=822), outputs_0:QuantSpec(Raw(type: Float16), uuid=823), )] (%1416:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=822)]) -> (%1417:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=823)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=823), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=824), )] (%1417:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=823)]) -> (%1418:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=824)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=824), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=824), )] (%1418:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=824)]) -> (%1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=824)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819), outputs_0:QuantSpec(Raw(type: Float16), uuid=825), )] (%1412:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=819)]) -> (%1420:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=825)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=825), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826), )] (%1420:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=825)]) -> (%1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=824), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), )] (%370:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=824)]) -> (%1422:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), )] (%371:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)]) -> (%1423:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), )] (%1422:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) -> (%1424:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), )] (%1423:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%1425:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=827), )] (%1415:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=821)], %1424:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) -> (%1426:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=827)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=827), inputs_1:QuantSpec(Raw(type: Float32), uuid=828), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=827), )] (%1426:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=827)], %1427:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=828), constant:[0.088388346]]) -> (%1428:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=827)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=827), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829), )] (%1428:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=827)]) -> (%1429:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829), inputs_1:QuantSpec(Raw(type: Int16), uuid=830), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829), )] (%1429:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829)], %1430:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=830), constant:[-20]]) -> (%1431:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=831), outputs_0:QuantSpec(Raw(type: UInt8), uuid=832), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1432:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=831), constant:[-0.9921875]]) -> (%1433:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=832)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=832), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=827), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829), )] (%1433:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=832)], %1428:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=827)], %1431:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829)]) -> (%1434:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=833), )] (%1434:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=829)]) -> (%1435:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=833)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=833), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=834), )] (%1435:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=833)], %1425:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=56)]) -> (%1436:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=834)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=834), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=834), )] (%1436:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=834)]) -> (%1437:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=834)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=834), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=834), )] (%1437:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=834)]) -> (%1437:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=834)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=834), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=835))] (%1437:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=834)]) -> (%1438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)]) - cf.ReturnOp (%1438:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=836)], %1419:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=824)], %1421:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=826)]) -> () - } - } - graph.SubGraphOp @model.layers.25.mlp [using_qnn:true, symbol:model.layers.25.mlp] { - (%1440:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837)]) -> (%1445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=838))] (%1440:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837)]) -> (%1441:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=840), )] (%1441:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=839)]) -> (%1442:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=840)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=841))] (%1440:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=837)]) -> (%1443:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=840), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=840), )] (%1442:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=840)], %1443:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=842)]) -> (%1444:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=840)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=840), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=843))] (%1444:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=840)]) -> (%1445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844)]) - cf.ReturnOp (%1445:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844)]) -> () - } - } - graph.SubGraphOp @model.layers.26 [using_qnn:true, symbol:model.layers.26] { - (%1446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %372:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %373:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%1487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=854)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=856)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=845), )] (%1446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844)]) -> (%1447:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=845)]) - graph.CallGraphOp @model.layers.26.self_attn (%1447:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=845)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %372:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %373:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%1479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=866)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=854)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=856)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=866), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=866), )] (%1479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=866)], %1446:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=844)]) -> (%1480:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=866)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=866), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=867), )] (%1480:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=866)]) -> (%1481:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=867)]) - graph.CallGraphOp @model.layers.26.mlp (%1481:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=867)]) -> (%1486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=866), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874), )] (%1486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874)], %1480:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=866)]) -> (%1487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874)]) - cf.ReturnOp (%1487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=854)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=856)]) -> () - } - } - graph.SubGraphOp @model.layers.26.self_attn [using_qnn:true, symbol:model.layers.26.self_attn] { - (%1447:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=845)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %372:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %373:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%1479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=866)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=854)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=856)]) { - linalg.CPU.LinearOp (%1447:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=845)]) -> (%1448:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=850)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=845), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=846))] (%1447:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=845)]) -> (%1449:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=845), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=849), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=848))] (%1447:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=845)]) -> (%1450:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=849)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=850), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=850), )] (%1448:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=850)]) -> (%1448:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=850)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=850), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=850), )] (%1448:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=850)]) -> (%1451:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=850)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), )] (%1449:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) -> (%1449:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), )] (%1449:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) -> (%1452:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=849), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=849), )] (%1450:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=849)]) -> (%1450:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=849)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=849), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=849), )] (%1450:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=849)]) -> (%1453:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=849)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=850), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851), )] (%1451:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=850)]) -> (%1454:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852), )] (%1452:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=847)]) -> (%1455:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851), )] (%1454:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1456:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852), )] (%1455:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1457:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852), outputs_0:QuantSpec(Raw(type: Float16), uuid=853), )] (%1457:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=852)]) -> (%1458:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=853)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=853), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=854), )] (%1458:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=853)]) -> (%1459:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=854)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=854), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=854), )] (%1459:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=854)]) -> (%1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=854)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=849), outputs_0:QuantSpec(Raw(type: Float16), uuid=855), )] (%1453:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=849)]) -> (%1461:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=855)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=855), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=856), )] (%1461:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=855)]) -> (%1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=856)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=854), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), )] (%372:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=854)]) -> (%1463:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=856), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), )] (%373:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=856)]) -> (%1464:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), )] (%1463:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) -> (%1465:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), )] (%1464:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%1466:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857), )] (%1456:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=851)], %1465:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) -> (%1467:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857), inputs_1:QuantSpec(Raw(type: Float32), uuid=858), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857), )] (%1467:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857)], %1468:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=858), constant:[0.088388346]]) -> (%1469:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=859), )] (%1469:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857)]) -> (%1470:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=859)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=859), inputs_1:QuantSpec(Raw(type: Int16), uuid=860), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=859), )] (%1470:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=859)], %1471:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=860), constant:[-20]]) -> (%1472:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=859)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=861), outputs_0:QuantSpec(Raw(type: UInt8), uuid=862), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1473:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=861), constant:[0.27929688]]) -> (%1474:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=862)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=862), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=859), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=859), )] (%1474:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=862)], %1469:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=857)], %1472:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=859)]) -> (%1475:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=859)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=859), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863), )] (%1475:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=859)]) -> (%1476:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864), )] (%1476:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=863)], %1466:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=57)]) -> (%1477:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864), )] (%1477:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864)]) -> (%1478:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864), )] (%1478:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864)]) -> (%1478:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=866), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=865))] (%1478:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=864)]) -> (%1479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=866)]) - cf.ReturnOp (%1479:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=866)], %1460:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=854)], %1462:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=856)]) -> () - } - } - graph.SubGraphOp @model.layers.26.mlp [using_qnn:true, symbol:model.layers.26.mlp] { - (%1481:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=867)]) -> (%1486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=867), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=868))] (%1481:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=867)]) -> (%1482:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870), )] (%1482:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=869)]) -> (%1483:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=867), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=871))] (%1481:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=867)]) -> (%1484:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870), )] (%1483:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870)], %1484:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=872)]) -> (%1485:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=873))] (%1485:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=870)]) -> (%1486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874)]) - cf.ReturnOp (%1486:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874)]) -> () - } - } - graph.SubGraphOp @model.layers.27 [using_qnn:true, symbol:model.layers.27] { - (%1487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %374:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %375:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%1528:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=884)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=886)]) { - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=875), )] (%1487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874)]) -> (%1488:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=875)]) - graph.CallGraphOp @model.layers.27.self_attn (%1488:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=875)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %374:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %375:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%1520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=884)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=886)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896), )] (%1520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896)], %1487:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=874)]) -> (%1521:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897), )] (%1521:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896)]) -> (%1522:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897)]) - graph.CallGraphOp @model.layers.27.mlp (%1522:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897)]) -> (%1527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904), )] (%1527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)], %1521:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896)]) -> (%1528:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)]) - cf.ReturnOp (%1528:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=884)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=886)]) -> () - } - } - graph.SubGraphOp @model.layers.27.self_attn [using_qnn:true, symbol:model.layers.27.self_attn] { - (%1488:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=875)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)], %319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %374:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %375:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%1520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=884)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=886)]) { - linalg.CPU.LinearOp (%1488:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=875)]) -> (%1489:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=880)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=875), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=876))] (%1488:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=875)]) -> (%1490:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=875), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=878))] (%1488:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=875)]) -> (%1491:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=880), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=880), )] (%1489:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=880)]) -> (%1489:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=880)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=880), outputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=880), )] (%1489:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=880)]) -> (%1492:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=880)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877), )] (%1490:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877)]) -> (%1490:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877), )] (%1490:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877)]) -> (%1493:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879), )] (%1491:tensor<[1, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879)]) -> (%1491:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879), )] (%1491:tensor<[1, 32, 8, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879)]) -> (%1494:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int16PerTensor), uuid=880), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881), )] (%1492:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(Raw(type: Int16PerTensor), uuid=880)]) -> (%1495:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) - linalg.CPU.RMSNormOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882), )] (%1493:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=877)]) -> (%1496:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881), )] (%1495:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1497:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)]) - linalg.CPU.RoPEOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882), )] (%1496:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882)], %379:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=62)], %380:tensor<[1, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=64)]) -> (%1498:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882), outputs_0:QuantSpec(Raw(type: Float16), uuid=883), )] (%1498:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=882)]) -> (%1499:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=883)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=883), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=884), )] (%1499:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=883)]) -> (%1500:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=884)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=884), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=884), )] (%1500:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=884)]) -> (%1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=884)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879), outputs_0:QuantSpec(Raw(type: Float16), uuid=885), )] (%1494:tensor<[1, 8, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=879)]) -> (%1502:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=885)]) - linalg.CPU.CastTypeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=885), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=886), )] (%1502:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=885)]) -> (%1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=886)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=884), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), )] (%374:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=884)]) -> (%1504:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) - linalg.CPU.ConcatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=886), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), )] (%375:tensor<[1, 8, 992, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=886)]) -> (%1505:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), )] (%1504:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) -> (%1506:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) - linalg.CPU.RepeatOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), )] (%1505:tensor<[1, 8, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%1507:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887), )] (%1497:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=881)], %1506:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) -> (%1508:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887), inputs_1:QuantSpec(Raw(type: Float32), uuid=888), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887), )] (%1508:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887)], %1509:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=888), constant:[0.088388346]]) -> (%1510:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887)]) - linalg.CPU.ReduceMinOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889), )] (%1510:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887)]) -> (%1511:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889)]) - linalg.CPU.AddOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889), inputs_1:QuantSpec(Raw(type: Int16), uuid=890), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889), )] (%1511:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889)], %1512:tensor<[1], Int16, CPU>[quant_recipe:QuantSpec(Raw(type: Int16), uuid=890), constant:[-20]]) -> (%1513:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889)]) - linalg.CPU.EqualOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2), inputs_1:QuantSpec(Raw(type: UInt16), uuid=891), outputs_0:QuantSpec(Raw(type: UInt8), uuid=892), )] (%319:tensor<[1, 1, 32, 1024], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %1514:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=891), constant:[0.890625]]) -> (%1515:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=892)]) - linalg.CPU.WhereOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=892), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887), inputs_2:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889), )] (%1515:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=892)], %1510:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=887)], %1513:tensor<[1, 16, 32, 1], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889)]) -> (%1516:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889)]) - linalg.CPU.SoftmaxOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=893), )] (%1516:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=889)]) -> (%1517:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=893)]) - linalg.CPU.MatMulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=893), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=894), )] (%1517:tensor<[1, 16, 32, 1024], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=893)], %1507:tensor<[1, 16, 1024, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=58)]) -> (%1518:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=894)]) - linalg.CPU.TransposeOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=894), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=894), )] (%1518:tensor<[1, 16, 32, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=894)]) -> (%1519:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=894)]) - linalg.CPU.ViewOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=894), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=894), )] (%1519:tensor<[1, 32, 16, 128], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=894)]) -> (%1519:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=894)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=894), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=895))] (%1519:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=894)]) -> (%1520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896)]) - cf.ReturnOp (%1520:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=896)], %1501:tensor<[1, 8, 128, 32], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=884)], %1503:tensor<[1, 8, 32, 128], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=886)]) -> () - } - } - graph.SubGraphOp @model.layers.27.mlp [using_qnn:true, symbol:model.layers.27.mlp] { - (%1522:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897)]) -> (%1527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)]) { - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=898))] (%1522:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897)]) -> (%1523:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899)]) - linalg.CPU.SiLUOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=900), )] (%1523:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=899)]) -> (%1524:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=900)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=902), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=901))] (%1522:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=897)]) -> (%1525:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=902)]) - linalg.CPU.MulOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=900), inputs_1:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=902), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=900), )] (%1524:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=900)], %1525:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=902)]) -> (%1526:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=900)]) - linalg.CPU.LinearOp [quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=900), outputs_0:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=903))] (%1526:tensor<[1, 32, 6144], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=900)]) -> (%1527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)]) - cf.ReturnOp (%1527:tensor<[1, 32, 2048], Int16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -32768, quant_max: 32767, quant_to_type: Int16, scale_type: Float32), uuid=904)]) -> () + (%8206:tensor<[1, 32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=0)], %8264:tensor<[32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=1)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8208:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8210:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8212:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8214:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8216:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8218:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8220:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8222:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8224:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8226:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8228:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8230:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8232:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8234:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8236:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8238:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8240:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8242:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8244:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8246:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8248:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8250:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8252:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8254:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8256:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8258:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8260:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8262:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8209:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)], %8211:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)], %8213:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)], %8215:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)], %8217:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)], %8219:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)], %8221:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)], %8223:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)], %8225:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)], %8227:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)], %8229:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)], %8231:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)], %8233:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)], %8235:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)], %8237:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)], %8239:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)], %8241:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)], %8243:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)], %8245:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)], %8247:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)], %8249:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)], %8251:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)], %8253:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)], %8255:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)], %8257:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)], %8259:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)], %8261:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)], %8263:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)]) -> (%9726:tensor<[1, 32, 151936], UInt16PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1049)], %8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)], %8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)], %8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)], %8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)], %8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)], %8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)], %8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)], %8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)], %8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)], %8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)], %8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)], %8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)], %8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)], %8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)], %9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)], %9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)], %9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)], %9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)], %9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)], %9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)], %9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)], %9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)], %9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)], %9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)], %9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)], %9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)], %9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)], %9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)], %8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)], %8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)], %8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)], %8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)], %8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)], %8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)], %8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)], %8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)], %8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)], %8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)], %8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)], %8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)], %8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)], %8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)], %9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)], %9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)], %9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)], %9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)], %9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)], %9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)], %9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)], %9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)], %9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)], %9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)], %9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)], %9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)], %9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)], %9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) { + graph.CallGraphOp @model.0.s32 (%8206:tensor<[1, 32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=0)], %8264:tensor<[32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=1)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8208:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8210:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8212:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8214:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8216:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8218:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8220:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8222:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8224:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8226:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8228:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8230:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8232:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8234:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8236:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8238:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8240:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8242:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8244:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8246:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8248:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8250:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8252:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8254:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8256:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8258:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8260:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8262:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8209:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)], %8211:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)], %8213:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)], %8215:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)], %8217:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)], %8219:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)], %8221:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)], %8223:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)], %8225:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)], %8227:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)], %8229:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)], %8231:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)], %8233:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)], %8235:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)], %8237:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)], %8239:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)], %8241:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)], %8243:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)], %8245:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)], %8247:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)], %8249:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)], %8251:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)], %8253:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)], %8255:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)], %8257:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)], %8259:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)], %8261:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)], %8263:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)]) -> (%9726:tensor<[1, 32, 151936], UInt16PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1049)], %8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)], %8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)], %8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)], %8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)], %8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)], %8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)], %8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)], %8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)], %8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)], %8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)], %8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)], %8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)], %8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)], %8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)], %9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)], %9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)], %9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)], %9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)], %9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)], %9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)], %9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)], %9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)], %9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)], %9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)], %9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)], %9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)], %9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)], %9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)], %8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)], %8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)], %8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)], %8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)], %8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)], %8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)], %8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)], %8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)], %8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)], %8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)], %8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)], %8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)], %8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)], %8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)], %9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)], %9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)], %9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)], %9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)], %9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)], %9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)], %9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)], %9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)], %9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)], %9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)], %9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)], %9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)], %9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)], %9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) + cf.ReturnOp (%9726:tensor<[1, 32, 151936], UInt16PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1049)], %8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)], %8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)], %8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)], %8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)], %8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)], %8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)], %8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)], %8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)], %8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)], %8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)], %8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)], %8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)], %8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)], %8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)], %9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)], %9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)], %9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)], %9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)], %9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)], %9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)], %9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)], %9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)], %9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)], %9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)], %9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)], %9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)], %9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)], %9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)], %8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)], %8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)], %8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)], %8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)], %8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)], %8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)], %8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)], %8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)], %8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)], %8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)], %8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)], %8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)], %8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)], %8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)], %9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)], %9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)], %9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)], %9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)], %9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)], %9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)], %9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)], %9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)], %9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)], %9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)], %9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)], %9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)], %9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)], %9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) -> () } } // ╔═════╗ @@ -1905,5 +12,1612 @@ // ╚═════╝ // ║ ║ // ╱╩╦╦╩╲ + graph.SubGraphOp @model.0.s32 [use_qnn:true, symbol:model.0.s32] { + (%8206:tensor<[1, 32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=0)], %8264:tensor<[32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=1)], %8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8208:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8210:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8212:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8214:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8216:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8218:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8220:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8222:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8224:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8226:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8228:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8230:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8232:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8234:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8236:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %8238:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %8240:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %8242:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %8244:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %8246:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %8248:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %8250:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %8252:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %8254:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %8256:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %8258:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %8260:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %8262:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %8209:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)], %8211:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)], %8213:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)], %8215:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)], %8217:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)], %8219:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)], %8221:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)], %8223:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)], %8225:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)], %8227:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)], %8229:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)], %8231:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)], %8233:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)], %8235:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)], %8237:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)], %8239:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)], %8241:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)], %8243:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)], %8245:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)], %8247:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)], %8249:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)], %8251:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)], %8253:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)], %8255:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)], %8257:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)], %8259:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)], %8261:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)], %8263:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)]) -> (%9726:tensor<[1, 32, 151936], UInt16PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1049)], %8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)], %8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)], %8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)], %8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)], %8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)], %8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)], %8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)], %8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)], %8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)], %8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)], %8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)], %8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)], %8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)], %8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)], %9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)], %9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)], %9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)], %9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)], %9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)], %9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)], %9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)], %9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)], %9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)], %9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)], %9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)], %9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)], %9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)], %9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)], %8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)], %8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)], %8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)], %8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)], %8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)], %8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)], %8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)], %8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)], %8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)], %8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)], %8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)], %8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)], %8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)], %8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)], %9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)], %9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)], %9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)], %9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)], %9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)], %9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)], %9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)], %9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)], %9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)], %9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)], %9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)], %9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)], %9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)], %9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) { + linalg.CPU.EmbeddingOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0] (%8206:tensor<[1, 32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=0)]) -> (%8265:tensor<[1, 32, 2048], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=59)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=59, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), weight_weight:QuantSpec(Raw(type: Float32), uuid=61, solved=0))] (%8265:tensor<[1, 32, 2048], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=59)]) -> (%8266:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.IndexOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=62, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8204:tensor<[1024, 5, 128], UInt16PerTensor, CPU>[@rope_sin][quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=62), symbol:rope_sin]) -> (%8267:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.IndexOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=64, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8205:tensor<[1024, 5, 128], UInt16PerTensor, CPU>[@rope_cos][quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=64), symbol:rope_cos]) -> (%8268:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=66, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=67, solved=0))] (%8266:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8269:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=66)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=66, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=68, solved=0))] (%8269:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=66)]) -> (%8270:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=66, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=70, solved=0))] (%8269:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=66)]) -> (%8271:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=66, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=72, solved=0))] (%8269:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=66)]) -> (%8272:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69, solved=0), )] (%8270:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69)]) -> (%8270:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69, solved=0), )] (%8270:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69)]) -> (%8273:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71, solved=0), )] (%8271:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71)]) -> (%8271:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71, solved=0), )] (%8271:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71)]) -> (%8274:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73, solved=0), )] (%8272:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73)]) -> (%8272:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73, solved=0), )] (%8272:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73)]) -> (%8275:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=0))] (%8273:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=69)]) -> (%8276:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=0))] (%8274:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=71)]) -> (%8277:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), )] (%8276:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) -> (%8276:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), )] (%8276:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) -> (%8276:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), )] (%8276:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) -> (%8278:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), )] (%8278:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)], %8276:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) -> (%8279:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), )] (%8279:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8280:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), )] (%8276:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8281:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), )] (%8281:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)], %8280:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) -> (%8282:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), )] (%8277:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) -> (%8277:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), )] (%8277:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) -> (%8277:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), )] (%8277:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) -> (%8283:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), )] (%8283:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)], %8277:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) -> (%8284:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), )] (%8284:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8285:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), )] (%8277:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8286:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), )] (%8286:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)], %8285:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) -> (%8287:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=78, solved=0), )] (%8287:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76)]) -> (%8288:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=78)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=78, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79, solved=0), )] (%8288:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=78)]) -> (%8289:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79, solved=0), )] (%8289:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)]) -> (%8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=80, solved=0), )] (%8275:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=73)]) -> (%8292:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=80)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=80, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81, solved=0), )] (%8292:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=80)]) -> (%8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3, solved=0), )] (%8208:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)], %8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)]) -> (%8295:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=0), )] (%8209:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)], %8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)]) -> (%8296:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3, solved=0), )] (%8295:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) -> (%8297:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=0), )] (%8296:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)]) -> (%8298:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82, solved=0), )] (%8282:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=74)], %8297:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=3)]) -> (%8299:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=83, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82, solved=0), )] (%8299:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82)], %8300:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=83), constant:[0.088388346]]) -> (%8301:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84, solved=0), )] (%8301:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82)]) -> (%8302:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=85, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84, solved=0), )] (%8302:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84)], %8303:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=85), constant:[-20]]) -> (%8304:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=86, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=87, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8305:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=86), constant:[0]]) -> (%8306:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=87)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=87, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84, solved=0), )] (%8306:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=87)], %8301:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=82)], %8304:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84)]) -> (%8307:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=0), )] (%8307:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=84)]) -> (%8308:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89, solved=0), )] (%8308:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88)], %8298:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31)]) -> (%8309:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89, solved=0), )] (%8309:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89)]) -> (%8310:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89, solved=0), )] (%8310:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89)]) -> (%8310:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=90, solved=0))] (%8310:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89)]) -> (%8311:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8266:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8311:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91)]) -> (%8312:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=92, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=93, solved=0))] (%8312:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8313:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=92)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=92, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=95, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=94, solved=0))] (%8313:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=92)]) -> (%8314:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=95)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=92, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=96, solved=0))] (%8313:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=92)]) -> (%8315:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=98, solved=0), )] (%8315:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97)]) -> (%8316:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=98)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=98, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97, solved=0), )] (%8315:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97)], %8316:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=98)]) -> (%8317:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=95, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97, solved=0), )] (%8317:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97)], %8314:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=95)]) -> (%8318:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=100, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=99, solved=0))] (%8318:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97)]) -> (%8319:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=100)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=100, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8312:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8319:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=100)]) -> (%8320:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=101, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=102, solved=0))] (%8320:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8321:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=101)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=101, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=103, solved=0))] (%8321:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=101)]) -> (%8322:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=101, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=105, solved=0))] (%8321:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=101)]) -> (%8323:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=101, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=107, solved=0))] (%8321:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=101)]) -> (%8324:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=0), )] (%8322:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104)]) -> (%8322:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=0), )] (%8322:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104)]) -> (%8325:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=0), )] (%8323:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106)]) -> (%8323:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=0), )] (%8323:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106)]) -> (%8326:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108, solved=0), )] (%8324:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108)]) -> (%8324:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108, solved=0), )] (%8324:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108)]) -> (%8327:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=110, solved=0))] (%8325:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104)]) -> (%8328:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=0))] (%8326:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106)]) -> (%8329:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), )] (%8328:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) -> (%8328:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), )] (%8328:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) -> (%8328:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), )] (%8328:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) -> (%8330:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), )] (%8330:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)], %8328:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) -> (%8331:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), )] (%8331:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8332:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), )] (%8328:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8333:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), )] (%8333:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)], %8332:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) -> (%8334:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), )] (%8329:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) -> (%8329:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), )] (%8329:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) -> (%8329:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), )] (%8329:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) -> (%8335:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), )] (%8335:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)], %8329:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) -> (%8336:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), )] (%8336:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8337:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), )] (%8329:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8338:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), )] (%8338:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)], %8337:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) -> (%8339:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=113, solved=0), )] (%8339:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=111)]) -> (%8340:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=113)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=113, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114, solved=0), )] (%8340:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=113)]) -> (%8341:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114, solved=0), )] (%8341:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)]) -> (%8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=115, solved=0), )] (%8327:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108)]) -> (%8344:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=115)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=115, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116, solved=0), )] (%8344:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=115)]) -> (%8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4, solved=0), )] (%8210:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)], %8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)]) -> (%8347:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=0), )] (%8211:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)], %8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)]) -> (%8348:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4, solved=0), )] (%8347:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) -> (%8349:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=0), )] (%8348:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)]) -> (%8350:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117, solved=0), )] (%8334:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=109)], %8349:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=4)]) -> (%8351:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=118, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117, solved=0), )] (%8351:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117)], %8352:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=118), constant:[0.088388346]]) -> (%8353:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=0), )] (%8353:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117)]) -> (%8354:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=120, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=0), )] (%8354:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119)], %8355:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=120), constant:[-20]]) -> (%8356:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=121, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=122, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8357:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=121), constant:[0]]) -> (%8358:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=122)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=122, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=0), )] (%8358:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=122)], %8353:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117)], %8356:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119)]) -> (%8359:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123, solved=0), )] (%8359:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119)]) -> (%8360:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=0), )] (%8360:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123)], %8350:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32)]) -> (%8361:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=0), )] (%8361:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124)]) -> (%8362:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=0), )] (%8362:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124)]) -> (%8362:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=125, solved=0))] (%8362:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124)]) -> (%8363:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8320:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8363:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126)]) -> (%8364:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=127, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=128, solved=0))] (%8364:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8365:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=127)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=127, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=130, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=129, solved=0))] (%8365:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=127)]) -> (%8366:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=130)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=127, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=131, solved=0))] (%8365:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=127)]) -> (%8367:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=133, solved=0), )] (%8367:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132)]) -> (%8368:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=133)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=133, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132, solved=0), )] (%8367:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132)], %8368:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=133)]) -> (%8369:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=130, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132, solved=0), )] (%8369:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132)], %8366:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=130)]) -> (%8370:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=135, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=134, solved=0))] (%8370:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=132)]) -> (%8371:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=135)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=135, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8364:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8371:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=135)]) -> (%8372:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=137, solved=0))] (%8372:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8373:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=138, solved=0))] (%8373:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136)]) -> (%8374:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=140, solved=0))] (%8373:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136)]) -> (%8375:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=142, solved=0))] (%8373:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136)]) -> (%8376:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139, solved=0), )] (%8374:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139)]) -> (%8374:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139, solved=0), )] (%8374:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139)]) -> (%8377:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141, solved=0), )] (%8375:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141)]) -> (%8375:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141, solved=0), )] (%8375:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141)]) -> (%8378:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143, solved=0), )] (%8376:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143)]) -> (%8376:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143, solved=0), )] (%8376:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143)]) -> (%8379:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=145, solved=0))] (%8377:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=139)]) -> (%8380:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=147, solved=0))] (%8378:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141)]) -> (%8381:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), )] (%8380:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) -> (%8380:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), )] (%8380:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) -> (%8380:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), )] (%8380:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) -> (%8382:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), )] (%8382:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)], %8380:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) -> (%8383:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), )] (%8383:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8384:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), )] (%8380:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8385:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), )] (%8385:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)], %8384:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) -> (%8386:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), )] (%8381:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) -> (%8381:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), )] (%8381:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) -> (%8381:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), )] (%8381:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) -> (%8387:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), )] (%8387:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)], %8381:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) -> (%8388:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), )] (%8388:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8389:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), )] (%8381:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8390:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), )] (%8390:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)], %8389:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) -> (%8391:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=148, solved=0), )] (%8391:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146)]) -> (%8392:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=148)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=148, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149, solved=0), )] (%8392:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=148)]) -> (%8393:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149, solved=0), )] (%8393:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)]) -> (%8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=150, solved=0), )] (%8379:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=143)]) -> (%8396:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=150)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=150, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151, solved=0), )] (%8396:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=150)]) -> (%8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5, solved=0), )] (%8212:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)], %8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)]) -> (%8399:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=0), )] (%8213:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)], %8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)]) -> (%8400:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5, solved=0), )] (%8399:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) -> (%8401:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=0), )] (%8400:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)]) -> (%8402:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152, solved=0), )] (%8386:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144)], %8401:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=5)]) -> (%8403:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=153, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152, solved=0), )] (%8403:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152)], %8404:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=153), constant:[0.088388346]]) -> (%8405:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=0), )] (%8405:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152)]) -> (%8406:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=155, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=0), )] (%8406:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154)], %8407:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=155), constant:[-20]]) -> (%8408:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=156, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=157, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8409:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=156), constant:[0]]) -> (%8410:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=157)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=157, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=0), )] (%8410:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=157)], %8405:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=152)], %8408:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154)]) -> (%8411:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=158, solved=0), )] (%8411:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154)]) -> (%8412:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=158)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=158, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=0), )] (%8412:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=158)], %8402:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33)]) -> (%8413:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=0), )] (%8413:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159)]) -> (%8414:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=0), )] (%8414:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159)]) -> (%8414:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=160, solved=0))] (%8414:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159)]) -> (%8415:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8372:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8415:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161)]) -> (%8416:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=163, solved=0))] (%8416:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8417:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=165, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=164, solved=0))] (%8417:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162)]) -> (%8418:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=165)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=166, solved=0))] (%8417:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162)]) -> (%8419:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=168, solved=0), )] (%8419:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167)]) -> (%8420:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=168)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=168, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=0), )] (%8419:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167)], %8420:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=168)]) -> (%8421:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=165, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=0), )] (%8421:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167)], %8418:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=165)]) -> (%8422:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=170, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=169, solved=0))] (%8422:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167)]) -> (%8423:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=170)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=170, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8416:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8423:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=170)]) -> (%8424:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=171, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=172, solved=0))] (%8424:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8425:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=171)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=171, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=173, solved=0))] (%8425:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=171)]) -> (%8426:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=171, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=175, solved=0))] (%8425:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=171)]) -> (%8427:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=171, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=177, solved=0))] (%8425:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=171)]) -> (%8428:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=0), )] (%8426:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174)]) -> (%8426:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=0), )] (%8426:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174)]) -> (%8429:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176, solved=0), )] (%8427:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176)]) -> (%8427:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176, solved=0), )] (%8427:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176)]) -> (%8430:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178, solved=0), )] (%8428:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178)]) -> (%8428:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178, solved=0), )] (%8428:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178)]) -> (%8431:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=180, solved=0))] (%8429:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174)]) -> (%8432:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=182, solved=0))] (%8430:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=176)]) -> (%8433:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), )] (%8432:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) -> (%8432:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), )] (%8432:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) -> (%8432:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), )] (%8432:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) -> (%8434:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), )] (%8434:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)], %8432:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) -> (%8435:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), )] (%8435:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8436:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), )] (%8432:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8437:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), )] (%8437:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)], %8436:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) -> (%8438:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), )] (%8433:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) -> (%8433:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), )] (%8433:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) -> (%8433:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), )] (%8433:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) -> (%8439:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), )] (%8439:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)], %8433:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) -> (%8440:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), )] (%8440:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8441:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), )] (%8433:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8442:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), )] (%8442:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)], %8441:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) -> (%8443:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=183, solved=0), )] (%8443:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=181)]) -> (%8444:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=183)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=183, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184, solved=0), )] (%8444:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=183)]) -> (%8445:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184, solved=0), )] (%8445:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)]) -> (%8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=185, solved=0), )] (%8431:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178)]) -> (%8448:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=185)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=185, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186, solved=0), )] (%8448:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=185)]) -> (%8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6, solved=0), )] (%8214:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)], %8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)]) -> (%8451:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=0), )] (%8215:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)], %8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)]) -> (%8452:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6, solved=0), )] (%8451:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) -> (%8453:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=0), )] (%8452:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)]) -> (%8454:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187, solved=0), )] (%8438:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179)], %8453:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=6)]) -> (%8455:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187, solved=0), )] (%8455:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187)], %8456:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188), constant:[0.088388346]]) -> (%8457:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189, solved=0), )] (%8457:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187)]) -> (%8458:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=190, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189, solved=0), )] (%8458:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189)], %8459:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=190), constant:[-20]]) -> (%8460:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=191, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=192, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8461:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=191), constant:[0]]) -> (%8462:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=192)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=192, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189, solved=0), )] (%8462:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=192)], %8457:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=187)], %8460:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189)]) -> (%8463:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=193, solved=0), )] (%8463:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189)]) -> (%8464:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=193)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=193, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194, solved=0), )] (%8464:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=193)], %8454:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34)]) -> (%8465:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194, solved=0), )] (%8465:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194)]) -> (%8466:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194, solved=0), )] (%8466:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194)]) -> (%8466:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=196, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=195, solved=0))] (%8466:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=194)]) -> (%8467:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=196)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=196, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8424:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8467:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=196)]) -> (%8468:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=198, solved=0))] (%8468:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8469:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=199, solved=0))] (%8469:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197)]) -> (%8470:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=201, solved=0))] (%8469:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197)]) -> (%8471:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=203, solved=0), )] (%8471:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202)]) -> (%8472:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=203)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=203, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=0), )] (%8471:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202)], %8472:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=203)]) -> (%8473:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=0), )] (%8473:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202)], %8470:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200)]) -> (%8474:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=204, solved=0))] (%8474:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202)]) -> (%8475:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8468:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8475:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205)]) -> (%8476:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=206, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=207, solved=0))] (%8476:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8477:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=206)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=206, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=208, solved=0))] (%8477:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=206)]) -> (%8478:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=206, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=210, solved=0))] (%8477:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=206)]) -> (%8479:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=206, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=212, solved=0))] (%8477:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=206)]) -> (%8480:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209, solved=0), )] (%8478:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209)]) -> (%8478:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209, solved=0), )] (%8478:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209)]) -> (%8481:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211, solved=0), )] (%8479:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211)]) -> (%8479:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211, solved=0), )] (%8479:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211)]) -> (%8482:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213, solved=0), )] (%8480:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213)]) -> (%8480:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213, solved=0), )] (%8480:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213)]) -> (%8483:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=215, solved=0))] (%8481:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=209)]) -> (%8484:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=217, solved=0))] (%8482:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211)]) -> (%8485:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), )] (%8484:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) -> (%8484:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), )] (%8484:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) -> (%8484:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), )] (%8484:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) -> (%8486:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), )] (%8486:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)], %8484:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) -> (%8487:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), )] (%8487:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8488:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), )] (%8484:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8489:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), )] (%8489:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)], %8488:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) -> (%8490:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), )] (%8485:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) -> (%8485:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), )] (%8485:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) -> (%8485:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), )] (%8485:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) -> (%8491:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), )] (%8491:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)], %8485:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) -> (%8492:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), )] (%8492:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8493:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), )] (%8485:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8494:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), )] (%8494:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)], %8493:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) -> (%8495:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=218, solved=0), )] (%8495:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216)]) -> (%8496:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=218)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=218, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219, solved=0), )] (%8496:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=218)]) -> (%8497:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219, solved=0), )] (%8497:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)]) -> (%8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=220, solved=0), )] (%8483:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213)]) -> (%8500:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=220)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=220, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221, solved=0), )] (%8500:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=220)]) -> (%8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7, solved=0), )] (%8216:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)], %8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)]) -> (%8503:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=0), )] (%8217:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)], %8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)]) -> (%8504:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7, solved=0), )] (%8503:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) -> (%8505:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=0), )] (%8504:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)]) -> (%8506:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222, solved=0), )] (%8490:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=214)], %8505:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=7)]) -> (%8507:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=223, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222, solved=0), )] (%8507:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222)], %8508:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=223), constant:[0.088388346]]) -> (%8509:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=0), )] (%8509:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222)]) -> (%8510:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=225, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=0), )] (%8510:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224)], %8511:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=225), constant:[-20]]) -> (%8512:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=226, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=227, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8513:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=226), constant:[0]]) -> (%8514:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=227)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=227, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=0), )] (%8514:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=227)], %8509:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222)], %8512:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224)]) -> (%8515:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=228, solved=0), )] (%8515:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224)]) -> (%8516:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=228)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=228, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=0), )] (%8516:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=228)], %8506:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35)]) -> (%8517:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=0), )] (%8517:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229)]) -> (%8518:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=0), )] (%8518:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229)]) -> (%8518:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=231, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=230, solved=0))] (%8518:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229)]) -> (%8519:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=231)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=231, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8476:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8519:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=231)]) -> (%8520:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=232, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=233, solved=0))] (%8520:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8521:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=232)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=232, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=234, solved=0))] (%8521:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=232)]) -> (%8522:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=232, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=236, solved=0))] (%8521:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=232)]) -> (%8523:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=0), )] (%8523:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237)]) -> (%8524:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=0), )] (%8523:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237)], %8524:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238)]) -> (%8525:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=0), )] (%8525:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237)], %8522:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235)]) -> (%8526:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=239, solved=0))] (%8526:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237)]) -> (%8527:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8520:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8527:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240)]) -> (%8528:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=242, solved=0))] (%8528:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8529:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=243, solved=0))] (%8529:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241)]) -> (%8530:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=245, solved=0))] (%8529:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241)]) -> (%8531:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=247, solved=0))] (%8529:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241)]) -> (%8532:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244, solved=0), )] (%8530:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244)]) -> (%8530:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244, solved=0), )] (%8530:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244)]) -> (%8533:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246, solved=0), )] (%8531:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246)]) -> (%8531:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246, solved=0), )] (%8531:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246)]) -> (%8534:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=0), )] (%8532:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248)]) -> (%8532:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=0), )] (%8532:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248)]) -> (%8535:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=250, solved=0))] (%8533:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=244)]) -> (%8536:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=252, solved=0))] (%8534:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=246)]) -> (%8537:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), )] (%8536:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) -> (%8536:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), )] (%8536:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) -> (%8536:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), )] (%8536:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) -> (%8538:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), )] (%8538:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)], %8536:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) -> (%8539:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), )] (%8539:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8540:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), )] (%8536:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8541:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), )] (%8541:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)], %8540:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) -> (%8542:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), )] (%8537:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) -> (%8537:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), )] (%8537:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) -> (%8537:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), )] (%8537:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) -> (%8543:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), )] (%8543:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)], %8537:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) -> (%8544:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), )] (%8544:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8545:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), )] (%8537:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8546:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), )] (%8546:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)], %8545:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) -> (%8547:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=253, solved=0), )] (%8547:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251)]) -> (%8548:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=253)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=253, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254, solved=0), )] (%8548:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=253)]) -> (%8549:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254, solved=0), )] (%8549:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)]) -> (%8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=255, solved=0), )] (%8535:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248)]) -> (%8552:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=255)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=255, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256, solved=0), )] (%8552:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=255)]) -> (%8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8, solved=0), )] (%8218:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)], %8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)]) -> (%8555:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=0), )] (%8219:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)], %8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)]) -> (%8556:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8, solved=0), )] (%8555:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) -> (%8557:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=0), )] (%8556:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)]) -> (%8558:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257, solved=0), )] (%8542:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249)], %8557:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=8)]) -> (%8559:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=258, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257, solved=0), )] (%8559:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257)], %8560:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=258), constant:[0.088388346]]) -> (%8561:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=0), )] (%8561:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257)]) -> (%8562:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=260, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=0), )] (%8562:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259)], %8563:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=260), constant:[-20]]) -> (%8564:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=261, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=262, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8565:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=261), constant:[0]]) -> (%8566:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=262)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=262, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=0), )] (%8566:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=262)], %8561:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=257)], %8564:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259)]) -> (%8567:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=263, solved=0), )] (%8567:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259)]) -> (%8568:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=263)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=263, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=0), )] (%8568:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=263)], %8558:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36)]) -> (%8569:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=0), )] (%8569:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264)]) -> (%8570:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=0), )] (%8570:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264)]) -> (%8570:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=266, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=265, solved=0))] (%8570:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264)]) -> (%8571:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=266)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=266, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8528:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8571:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=266)]) -> (%8572:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=268, solved=0))] (%8572:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8573:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=270, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=269, solved=0))] (%8573:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267)]) -> (%8574:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=270)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=271, solved=0))] (%8573:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267)]) -> (%8575:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273, solved=0), )] (%8575:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272)]) -> (%8576:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272, solved=0), )] (%8575:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272)], %8576:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273)]) -> (%8577:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=270, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272, solved=0), )] (%8577:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272)], %8574:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=270)]) -> (%8578:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=274, solved=0))] (%8578:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=272)]) -> (%8579:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8572:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8579:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275)]) -> (%8580:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=277, solved=0))] (%8580:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8581:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=278, solved=0))] (%8581:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276)]) -> (%8582:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=280, solved=0))] (%8581:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276)]) -> (%8583:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=282, solved=0))] (%8581:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276)]) -> (%8584:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279, solved=0), )] (%8582:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279)]) -> (%8582:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279, solved=0), )] (%8582:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279)]) -> (%8585:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=0), )] (%8583:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281)]) -> (%8583:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=0), )] (%8583:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281)]) -> (%8586:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283, solved=0), )] (%8584:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283)]) -> (%8584:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283, solved=0), )] (%8584:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283)]) -> (%8587:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=285, solved=0))] (%8585:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279)]) -> (%8588:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=287, solved=0))] (%8586:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281)]) -> (%8589:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), )] (%8588:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) -> (%8588:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), )] (%8588:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) -> (%8588:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), )] (%8588:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) -> (%8590:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), )] (%8590:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)], %8588:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) -> (%8591:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), )] (%8591:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8592:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), )] (%8588:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8593:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), )] (%8593:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)], %8592:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) -> (%8594:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), )] (%8589:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) -> (%8589:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), )] (%8589:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) -> (%8589:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), )] (%8589:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) -> (%8595:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), )] (%8595:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)], %8589:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) -> (%8596:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), )] (%8596:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8597:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), )] (%8589:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8598:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), )] (%8598:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)], %8597:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) -> (%8599:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=288, solved=0), )] (%8599:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286)]) -> (%8600:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=288)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=288, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289, solved=0), )] (%8600:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=288)]) -> (%8601:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289, solved=0), )] (%8601:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)]) -> (%8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=290, solved=0), )] (%8587:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=283)]) -> (%8604:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=290)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=290, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291, solved=0), )] (%8604:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=290)]) -> (%8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9, solved=0), )] (%8220:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)], %8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)]) -> (%8607:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=0), )] (%8221:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)], %8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)]) -> (%8608:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9, solved=0), )] (%8607:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) -> (%8609:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=0), )] (%8608:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)]) -> (%8610:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292, solved=0), )] (%8594:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=284)], %8609:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=9)]) -> (%8611:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=293, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292, solved=0), )] (%8611:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292)], %8612:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=293), constant:[0.088388346]]) -> (%8613:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=0), )] (%8613:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292)]) -> (%8614:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=295, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=0), )] (%8614:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294)], %8615:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=295), constant:[-20]]) -> (%8616:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=296, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=297, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8617:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=296), constant:[0]]) -> (%8618:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=297)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=297, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=0), )] (%8618:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=297)], %8613:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292)], %8616:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294)]) -> (%8619:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=298, solved=0), )] (%8619:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294)]) -> (%8620:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=298)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=298, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299, solved=0), )] (%8620:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=298)], %8610:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37)]) -> (%8621:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299, solved=0), )] (%8621:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299)]) -> (%8622:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299, solved=0), )] (%8622:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299)]) -> (%8622:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=301, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=300, solved=0))] (%8622:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=299)]) -> (%8623:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=301)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=301, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8580:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8623:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=301)]) -> (%8624:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=303, solved=0))] (%8624:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8625:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=305, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=304, solved=0))] (%8625:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302)]) -> (%8626:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=305)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=306, solved=0))] (%8625:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302)]) -> (%8627:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=308, solved=0), )] (%8627:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307)]) -> (%8628:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=308)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=308, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307, solved=0), )] (%8627:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307)], %8628:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=308)]) -> (%8629:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=305, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307, solved=0), )] (%8629:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307)], %8626:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=305)]) -> (%8630:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=310, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=309, solved=0))] (%8630:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307)]) -> (%8631:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=310)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=310, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8624:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8631:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=310)]) -> (%8632:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=312, solved=0))] (%8632:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8633:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=313, solved=0))] (%8633:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311)]) -> (%8634:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=315, solved=0))] (%8633:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311)]) -> (%8635:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=317, solved=0))] (%8633:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311)]) -> (%8636:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=0), )] (%8634:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314)]) -> (%8634:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=0), )] (%8634:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314)]) -> (%8637:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=0), )] (%8635:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316)]) -> (%8635:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=0), )] (%8635:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316)]) -> (%8638:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=0), )] (%8636:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318)]) -> (%8636:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=0), )] (%8636:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318)]) -> (%8639:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=320, solved=0))] (%8637:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314)]) -> (%8640:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=322, solved=0))] (%8638:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316)]) -> (%8641:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), )] (%8640:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) -> (%8640:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), )] (%8640:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) -> (%8640:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), )] (%8640:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) -> (%8642:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), )] (%8642:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)], %8640:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) -> (%8643:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), )] (%8643:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8644:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), )] (%8640:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8645:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), )] (%8645:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)], %8644:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) -> (%8646:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), )] (%8641:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) -> (%8641:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), )] (%8641:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) -> (%8641:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), )] (%8641:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) -> (%8647:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), )] (%8647:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)], %8641:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) -> (%8648:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), )] (%8648:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8649:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), )] (%8641:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8650:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), )] (%8650:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)], %8649:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) -> (%8651:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=323, solved=0), )] (%8651:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=321)]) -> (%8652:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=323)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=323, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324, solved=0), )] (%8652:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=323)]) -> (%8653:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324, solved=0), )] (%8653:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)]) -> (%8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=325, solved=0), )] (%8639:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318)]) -> (%8656:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=325)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=325, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326, solved=0), )] (%8656:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=325)]) -> (%8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10, solved=0), )] (%8222:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)], %8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)]) -> (%8659:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=0), )] (%8223:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)], %8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)]) -> (%8660:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10, solved=0), )] (%8659:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) -> (%8661:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=0), )] (%8660:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)]) -> (%8662:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327, solved=0), )] (%8646:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319)], %8661:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=10)]) -> (%8663:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=328, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327, solved=0), )] (%8663:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327)], %8664:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=328), constant:[0.088388346]]) -> (%8665:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329, solved=0), )] (%8665:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327)]) -> (%8666:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=330, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329, solved=0), )] (%8666:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329)], %8667:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=330), constant:[-20]]) -> (%8668:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=331, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=332, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8669:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=331), constant:[0]]) -> (%8670:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=332)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=332, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329, solved=0), )] (%8670:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=332)], %8665:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327)], %8668:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329)]) -> (%8671:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=333, solved=0), )] (%8671:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=329)]) -> (%8672:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=333)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=333, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=0), )] (%8672:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=333)], %8662:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38)]) -> (%8673:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=0), )] (%8673:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334)]) -> (%8674:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=0), )] (%8674:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334)]) -> (%8674:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=336, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=335, solved=0))] (%8674:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334)]) -> (%8675:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=336)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=336, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8632:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8675:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=336)]) -> (%8676:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=337, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=338, solved=0))] (%8676:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8677:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=337)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=337, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=339, solved=0))] (%8677:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=337)]) -> (%8678:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=337, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=341, solved=0))] (%8677:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=337)]) -> (%8679:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=343, solved=0), )] (%8679:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342)]) -> (%8680:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=343)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=343, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342, solved=0), )] (%8679:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342)], %8680:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=343)]) -> (%8681:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342, solved=0), )] (%8681:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342)], %8678:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340)]) -> (%8682:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=345, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=344, solved=0))] (%8682:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=342)]) -> (%8683:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=345)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=345, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8676:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8683:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=345)]) -> (%8684:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=346, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=347, solved=0))] (%8684:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8685:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=346)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=346, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=348, solved=0))] (%8685:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=346)]) -> (%8686:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=346, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=350, solved=0))] (%8685:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=346)]) -> (%8687:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=346, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=352, solved=0))] (%8685:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=346)]) -> (%8688:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=0), )] (%8686:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349)]) -> (%8686:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=0), )] (%8686:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349)]) -> (%8689:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=0), )] (%8687:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351)]) -> (%8687:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=0), )] (%8687:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351)]) -> (%8690:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353, solved=0), )] (%8688:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353)]) -> (%8688:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353, solved=0), )] (%8688:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353)]) -> (%8691:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=355, solved=0))] (%8689:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349)]) -> (%8692:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=357, solved=0))] (%8690:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351)]) -> (%8693:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), )] (%8692:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) -> (%8692:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), )] (%8692:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) -> (%8692:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), )] (%8692:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) -> (%8694:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), )] (%8694:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)], %8692:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) -> (%8695:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), )] (%8695:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8696:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), )] (%8692:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8697:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), )] (%8697:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)], %8696:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) -> (%8698:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), )] (%8693:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) -> (%8693:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), )] (%8693:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) -> (%8693:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), )] (%8693:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) -> (%8699:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), )] (%8699:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)], %8693:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) -> (%8700:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), )] (%8700:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8701:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), )] (%8693:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8702:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), )] (%8702:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)], %8701:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) -> (%8703:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=358, solved=0), )] (%8703:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356)]) -> (%8704:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=358)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=358, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=0), )] (%8704:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=358)]) -> (%8705:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=0), )] (%8705:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)]) -> (%8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=360, solved=0), )] (%8691:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353)]) -> (%8708:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=360)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=360, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361, solved=0), )] (%8708:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=360)]) -> (%8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11, solved=0), )] (%8224:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)], %8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)]) -> (%8711:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=0), )] (%8225:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)], %8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)]) -> (%8712:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11, solved=0), )] (%8711:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) -> (%8713:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=0), )] (%8712:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)]) -> (%8714:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=0), )] (%8698:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354)], %8713:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=11)]) -> (%8715:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=363, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=0), )] (%8715:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362)], %8716:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=363), constant:[0.088388346]]) -> (%8717:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=0), )] (%8717:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362)]) -> (%8718:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=365, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=0), )] (%8718:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364)], %8719:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=365), constant:[-20]]) -> (%8720:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=366, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=367, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8721:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=366), constant:[0]]) -> (%8722:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=367)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=367, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=0), )] (%8722:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=367)], %8717:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362)], %8720:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364)]) -> (%8723:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=368, solved=0), )] (%8723:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364)]) -> (%8724:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=368)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=368, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369, solved=0), )] (%8724:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=368)], %8714:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39)]) -> (%8725:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369, solved=0), )] (%8725:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369)]) -> (%8726:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369, solved=0), )] (%8726:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369)]) -> (%8726:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=371, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=370, solved=0))] (%8726:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369)]) -> (%8727:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=371)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=371, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8684:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8727:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=371)]) -> (%8728:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=373, solved=0))] (%8728:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8729:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=375, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=374, solved=0))] (%8729:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372)]) -> (%8730:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=375)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=376, solved=0))] (%8729:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372)]) -> (%8731:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=0), )] (%8731:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377)]) -> (%8732:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377, solved=0), )] (%8731:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377)], %8732:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378)]) -> (%8733:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=375, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377, solved=0), )] (%8733:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377)], %8730:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=375)]) -> (%8734:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=380, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=379, solved=0))] (%8734:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=377)]) -> (%8735:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=380)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=380, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8728:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8735:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=380)]) -> (%8736:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=382, solved=0))] (%8736:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8737:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=383, solved=0))] (%8737:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381)]) -> (%8738:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=385, solved=0))] (%8737:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381)]) -> (%8739:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=387, solved=0))] (%8737:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381)]) -> (%8740:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384, solved=0), )] (%8738:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384)]) -> (%8738:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384, solved=0), )] (%8738:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384)]) -> (%8741:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386, solved=0), )] (%8739:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386)]) -> (%8739:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386, solved=0), )] (%8739:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386)]) -> (%8742:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388, solved=0), )] (%8740:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388)]) -> (%8740:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388, solved=0), )] (%8740:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388)]) -> (%8743:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=0))] (%8741:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=384)]) -> (%8744:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=0))] (%8742:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=386)]) -> (%8745:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), )] (%8744:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) -> (%8744:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), )] (%8744:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) -> (%8744:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), )] (%8744:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) -> (%8746:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), )] (%8746:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)], %8744:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) -> (%8747:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), )] (%8747:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8748:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), )] (%8744:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8749:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), )] (%8749:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)], %8748:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) -> (%8750:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), )] (%8745:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) -> (%8745:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), )] (%8745:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) -> (%8745:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), )] (%8745:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) -> (%8751:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), )] (%8751:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)], %8745:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) -> (%8752:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), )] (%8752:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8753:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), )] (%8745:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8754:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), )] (%8754:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)], %8753:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) -> (%8755:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=393, solved=0), )] (%8755:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391)]) -> (%8756:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=393)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=393, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394, solved=0), )] (%8756:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=393)]) -> (%8757:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394, solved=0), )] (%8757:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)]) -> (%8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=395, solved=0), )] (%8743:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=388)]) -> (%8760:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=395)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=395, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396, solved=0), )] (%8760:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=395)]) -> (%8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12, solved=0), )] (%8226:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)], %8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)]) -> (%8763:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=0), )] (%8227:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)], %8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)]) -> (%8764:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12, solved=0), )] (%8763:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) -> (%8765:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=0), )] (%8764:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)]) -> (%8766:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397, solved=0), )] (%8750:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389)], %8765:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=12)]) -> (%8767:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=398, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397, solved=0), )] (%8767:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397)], %8768:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=398), constant:[0.088388346]]) -> (%8769:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399, solved=0), )] (%8769:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397)]) -> (%8770:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=400, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399, solved=0), )] (%8770:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399)], %8771:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=400), constant:[-20]]) -> (%8772:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=401, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=402, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8773:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=401), constant:[0]]) -> (%8774:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=402)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=402, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399, solved=0), )] (%8774:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=402)], %8769:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=397)], %8772:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399)]) -> (%8775:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=403, solved=0), )] (%8775:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=399)]) -> (%8776:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=403)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=403, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404, solved=0), )] (%8776:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=403)], %8766:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40)]) -> (%8777:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404, solved=0), )] (%8777:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404)]) -> (%8778:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404, solved=0), )] (%8778:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404)]) -> (%8778:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=406, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=405, solved=0))] (%8778:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=404)]) -> (%8779:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=406)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=406, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8736:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8779:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=406)]) -> (%8780:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=407, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=408, solved=0))] (%8780:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8781:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=407)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=407, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=410, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=409, solved=0))] (%8781:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=407)]) -> (%8782:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=410)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=407, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=411, solved=0))] (%8781:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=407)]) -> (%8783:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=413, solved=0), )] (%8783:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412)]) -> (%8784:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=413)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=413, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412, solved=0), )] (%8783:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412)], %8784:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=413)]) -> (%8785:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=410, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412, solved=0), )] (%8785:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412)], %8782:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=410)]) -> (%8786:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=415, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=414, solved=0))] (%8786:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412)]) -> (%8787:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=415)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=415, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8780:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8787:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=415)]) -> (%8788:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=417, solved=0))] (%8788:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8789:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=418, solved=0))] (%8789:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416)]) -> (%8790:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=420, solved=0))] (%8789:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416)]) -> (%8791:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=422, solved=0))] (%8789:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416)]) -> (%8792:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=0), )] (%8790:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419)]) -> (%8790:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=0), )] (%8790:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419)]) -> (%8793:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421, solved=0), )] (%8791:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421)]) -> (%8791:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421, solved=0), )] (%8791:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421)]) -> (%8794:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=0), )] (%8792:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423)]) -> (%8792:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=0), )] (%8792:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423)]) -> (%8795:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=425, solved=0))] (%8793:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419)]) -> (%8796:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=427, solved=0))] (%8794:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421)]) -> (%8797:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), )] (%8796:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) -> (%8796:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), )] (%8796:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) -> (%8796:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), )] (%8796:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) -> (%8798:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), )] (%8798:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)], %8796:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) -> (%8799:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), )] (%8799:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8800:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), )] (%8796:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8801:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), )] (%8801:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)], %8800:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) -> (%8802:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), )] (%8797:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) -> (%8797:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), )] (%8797:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) -> (%8797:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), )] (%8797:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) -> (%8803:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), )] (%8803:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)], %8797:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) -> (%8804:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), )] (%8804:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8805:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), )] (%8797:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8806:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), )] (%8806:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)], %8805:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) -> (%8807:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=428, solved=0), )] (%8807:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=426)]) -> (%8808:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=428)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=428, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429, solved=0), )] (%8808:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=428)]) -> (%8809:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429, solved=0), )] (%8809:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)]) -> (%8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=430, solved=0), )] (%8795:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423)]) -> (%8812:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=430)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=430, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431, solved=0), )] (%8812:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=430)]) -> (%8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13, solved=0), )] (%8228:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)], %8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)]) -> (%8815:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=0), )] (%8229:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)], %8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)]) -> (%8816:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13, solved=0), )] (%8815:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) -> (%8817:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=0), )] (%8816:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)]) -> (%8818:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=0), )] (%8802:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=424)], %8817:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=13)]) -> (%8819:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=433, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=0), )] (%8819:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432)], %8820:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=433), constant:[0.088388346]]) -> (%8821:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434, solved=0), )] (%8821:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432)]) -> (%8822:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=435, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434, solved=0), )] (%8822:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434)], %8823:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=435), constant:[-20]]) -> (%8824:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=436, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=437, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8825:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=436), constant:[0]]) -> (%8826:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=437)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=437, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434, solved=0), )] (%8826:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=437)], %8821:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432)], %8824:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434)]) -> (%8827:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=438, solved=0), )] (%8827:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=434)]) -> (%8828:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=438)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=438, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439, solved=0), )] (%8828:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=438)], %8818:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41)]) -> (%8829:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439, solved=0), )] (%8829:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439)]) -> (%8830:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439, solved=0), )] (%8830:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439)]) -> (%8830:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=441, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=440, solved=0))] (%8830:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439)]) -> (%8831:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=441)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=441, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8788:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8831:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=441)]) -> (%8832:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=442, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=443, solved=0))] (%8832:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8833:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=442)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=442, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=445, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=444, solved=0))] (%8833:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=442)]) -> (%8834:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=445)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=442, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=446, solved=0))] (%8833:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=442)]) -> (%8835:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=448, solved=0), )] (%8835:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447)]) -> (%8836:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=448)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=448, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447, solved=0), )] (%8835:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447)], %8836:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=448)]) -> (%8837:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=445, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447, solved=0), )] (%8837:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447)], %8834:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=445)]) -> (%8838:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=450, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=449, solved=0))] (%8838:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=447)]) -> (%8839:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=450)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=450, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8832:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8839:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=450)]) -> (%8840:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=451, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=452, solved=0))] (%8840:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8841:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=451)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=451, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=453, solved=0))] (%8841:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=451)]) -> (%8842:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=451, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=455, solved=0))] (%8841:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=451)]) -> (%8843:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=451, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=457, solved=0))] (%8841:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=451)]) -> (%8844:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=0), )] (%8842:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454)]) -> (%8842:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=0), )] (%8842:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454)]) -> (%8845:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456, solved=0), )] (%8843:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456)]) -> (%8843:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456, solved=0), )] (%8843:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456)]) -> (%8846:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=0), )] (%8844:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458)]) -> (%8844:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=0), )] (%8844:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458)]) -> (%8847:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=460, solved=0))] (%8845:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454)]) -> (%8848:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=462, solved=0))] (%8846:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=456)]) -> (%8849:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), )] (%8848:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) -> (%8848:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), )] (%8848:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) -> (%8848:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), )] (%8848:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) -> (%8850:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), )] (%8850:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)], %8848:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) -> (%8851:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), )] (%8851:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8852:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), )] (%8848:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8853:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), )] (%8853:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)], %8852:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) -> (%8854:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), )] (%8849:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) -> (%8849:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), )] (%8849:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) -> (%8849:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), )] (%8849:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) -> (%8855:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), )] (%8855:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)], %8849:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) -> (%8856:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), )] (%8856:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8857:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), )] (%8849:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8858:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), )] (%8858:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)], %8857:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) -> (%8859:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=463, solved=0), )] (%8859:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461)]) -> (%8860:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=463)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=463, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464, solved=0), )] (%8860:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=463)]) -> (%8861:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464, solved=0), )] (%8861:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)]) -> (%8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=465, solved=0), )] (%8847:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458)]) -> (%8864:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=465)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=465, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466, solved=0), )] (%8864:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=465)]) -> (%8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14, solved=0), )] (%8230:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)], %8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)]) -> (%8867:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=0), )] (%8231:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)], %8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)]) -> (%8868:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14, solved=0), )] (%8867:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) -> (%8869:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=0), )] (%8868:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)]) -> (%8870:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467, solved=0), )] (%8854:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459)], %8869:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=14)]) -> (%8871:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467, solved=0), )] (%8871:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467)], %8872:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468), constant:[0.088388346]]) -> (%8873:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469, solved=0), )] (%8873:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467)]) -> (%8874:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=470, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469, solved=0), )] (%8874:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469)], %8875:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=470), constant:[-20]]) -> (%8876:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=471, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=472, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8877:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=471), constant:[0]]) -> (%8878:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=472)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=472, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469, solved=0), )] (%8878:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=472)], %8873:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467)], %8876:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469)]) -> (%8879:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=473, solved=0), )] (%8879:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469)]) -> (%8880:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=473)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=473, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474, solved=0), )] (%8880:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=473)], %8870:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42)]) -> (%8881:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474, solved=0), )] (%8881:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474)]) -> (%8882:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474, solved=0), )] (%8882:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474)]) -> (%8882:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=475, solved=0))] (%8882:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=474)]) -> (%8883:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8840:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8883:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476)]) -> (%8884:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=477, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=478, solved=0))] (%8884:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8885:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=477)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=477, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=480, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=479, solved=0))] (%8885:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=477)]) -> (%8886:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=480)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=477, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=481, solved=0))] (%8885:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=477)]) -> (%8887:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=483, solved=0), )] (%8887:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482)]) -> (%8888:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=483)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=483, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482, solved=0), )] (%8887:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482)], %8888:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=483)]) -> (%8889:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=480, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482, solved=0), )] (%8889:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482)], %8886:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=480)]) -> (%8890:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=485, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=484, solved=0))] (%8890:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482)]) -> (%8891:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=485)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=485, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8884:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8891:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=485)]) -> (%8892:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=487, solved=0))] (%8892:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8893:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=488, solved=0))] (%8893:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486)]) -> (%8894:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=490, solved=0))] (%8893:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486)]) -> (%8895:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=492, solved=0))] (%8893:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486)]) -> (%8896:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489, solved=0), )] (%8894:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489)]) -> (%8894:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489, solved=0), )] (%8894:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489)]) -> (%8897:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491, solved=0), )] (%8895:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491)]) -> (%8895:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491, solved=0), )] (%8895:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491)]) -> (%8898:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493, solved=0), )] (%8896:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493)]) -> (%8896:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493, solved=0), )] (%8896:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493)]) -> (%8899:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=495, solved=0))] (%8897:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=489)]) -> (%8900:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=497, solved=0))] (%8898:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=491)]) -> (%8901:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), )] (%8900:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) -> (%8900:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), )] (%8900:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) -> (%8900:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), )] (%8900:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) -> (%8902:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), )] (%8902:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)], %8900:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) -> (%8903:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), )] (%8903:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8904:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), )] (%8900:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8905:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), )] (%8905:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)], %8904:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) -> (%8906:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), )] (%8901:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) -> (%8901:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), )] (%8901:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) -> (%8901:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), )] (%8901:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) -> (%8907:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), )] (%8907:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)], %8901:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) -> (%8908:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), )] (%8908:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8909:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), )] (%8901:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8910:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), )] (%8910:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)], %8909:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) -> (%8911:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=498, solved=0), )] (%8911:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496)]) -> (%8912:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=498)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=498, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499, solved=0), )] (%8912:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=498)]) -> (%8913:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499, solved=0), )] (%8913:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)]) -> (%8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=500, solved=0), )] (%8899:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493)]) -> (%8916:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=500)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=500, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501, solved=0), )] (%8916:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=500)]) -> (%8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15, solved=0), )] (%8232:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)], %8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)]) -> (%8919:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=0), )] (%8233:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)], %8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)]) -> (%8920:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15, solved=0), )] (%8919:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) -> (%8921:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=0), )] (%8920:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)]) -> (%8922:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502, solved=0), )] (%8906:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=494)], %8921:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=15)]) -> (%8923:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=503, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502, solved=0), )] (%8923:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502)], %8924:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=503), constant:[0.088388346]]) -> (%8925:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=0), )] (%8925:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502)]) -> (%8926:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=505, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=0), )] (%8926:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504)], %8927:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=505), constant:[-20]]) -> (%8928:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=506, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=507, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8929:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=506), constant:[0]]) -> (%8930:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=507)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=507, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=0), )] (%8930:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=507)], %8925:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=502)], %8928:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504)]) -> (%8931:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508, solved=0), )] (%8931:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504)]) -> (%8932:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=0), )] (%8932:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508)], %8922:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43)]) -> (%8933:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=0), )] (%8933:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509)]) -> (%8934:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=0), )] (%8934:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509)]) -> (%8934:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=511, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=510, solved=0))] (%8934:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509)]) -> (%8935:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=511)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=511, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8892:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8935:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=511)]) -> (%8936:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=512, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=513, solved=0))] (%8936:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8937:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=512)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=512, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=515, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=514, solved=0))] (%8937:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=512)]) -> (%8938:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=515)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=512, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=516, solved=0))] (%8937:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=512)]) -> (%8939:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=518, solved=0), )] (%8939:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517)]) -> (%8940:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=518)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=518, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517, solved=0), )] (%8939:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517)], %8940:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=518)]) -> (%8941:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=515, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517, solved=0), )] (%8941:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517)], %8938:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=515)]) -> (%8942:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=520, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=519, solved=0))] (%8942:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517)]) -> (%8943:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=520)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=520, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8936:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8943:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=520)]) -> (%8944:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=522, solved=0))] (%8944:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8945:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=523, solved=0))] (%8945:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521)]) -> (%8946:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=525, solved=0))] (%8945:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521)]) -> (%8947:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=527, solved=0))] (%8945:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521)]) -> (%8948:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=0), )] (%8946:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524)]) -> (%8946:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=0), )] (%8946:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524)]) -> (%8949:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526, solved=0), )] (%8947:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526)]) -> (%8947:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526, solved=0), )] (%8947:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526)]) -> (%8950:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=0), )] (%8948:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528)]) -> (%8948:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=0), )] (%8948:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528)]) -> (%8951:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=0))] (%8949:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524)]) -> (%8952:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=532, solved=0))] (%8950:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526)]) -> (%8953:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), )] (%8952:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) -> (%8952:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), )] (%8952:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) -> (%8952:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), )] (%8952:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) -> (%8954:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), )] (%8954:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)], %8952:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) -> (%8955:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), )] (%8955:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8956:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), )] (%8952:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8957:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), )] (%8957:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)], %8956:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) -> (%8958:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), )] (%8953:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) -> (%8953:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), )] (%8953:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) -> (%8953:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), )] (%8953:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) -> (%8959:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), )] (%8959:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)], %8953:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) -> (%8960:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), )] (%8960:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8961:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), )] (%8953:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8962:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), )] (%8962:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)], %8961:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) -> (%8963:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=533, solved=0), )] (%8963:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531)]) -> (%8964:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=533)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=533, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534, solved=0), )] (%8964:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=533)]) -> (%8965:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534, solved=0), )] (%8965:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)]) -> (%8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=535, solved=0), )] (%8951:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528)]) -> (%8968:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=535)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=535, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536, solved=0), )] (%8968:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=535)]) -> (%8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16, solved=0), )] (%8234:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)], %8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)]) -> (%8971:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=0), )] (%8235:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)], %8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)]) -> (%8972:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16, solved=0), )] (%8971:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) -> (%8973:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=0), )] (%8972:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)]) -> (%8974:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=0), )] (%8958:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=529)], %8973:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=16)]) -> (%8975:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=538, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=0), )] (%8975:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537)], %8976:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=538), constant:[0.088388346]]) -> (%8977:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=0), )] (%8977:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537)]) -> (%8978:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=540, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=0), )] (%8978:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539)], %8979:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=540), constant:[-20]]) -> (%8980:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=541, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=542, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %8981:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=541), constant:[0]]) -> (%8982:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=542)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=542, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=0), )] (%8982:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=542)], %8977:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537)], %8980:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539)]) -> (%8983:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=543, solved=0), )] (%8983:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539)]) -> (%8984:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=543)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=543, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=0), )] (%8984:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=543)], %8974:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44)]) -> (%8985:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=0), )] (%8985:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544)]) -> (%8986:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=0), )] (%8986:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544)]) -> (%8986:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=545, solved=0))] (%8986:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544)]) -> (%8987:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8944:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8987:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546)]) -> (%8988:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=548, solved=0))] (%8988:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8989:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=550, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=549, solved=0))] (%8989:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547)]) -> (%8990:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=550)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=551, solved=0))] (%8989:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547)]) -> (%8991:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=553, solved=0), )] (%8991:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552)]) -> (%8992:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=553)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=553, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=0), )] (%8991:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552)], %8992:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=553)]) -> (%8993:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=550, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=0), )] (%8993:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552)], %8990:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=550)]) -> (%8994:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=555, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=554, solved=0))] (%8994:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552)]) -> (%8995:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=555)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=555, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8988:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %8995:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=555)]) -> (%8996:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=556, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=557, solved=0))] (%8996:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%8997:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=556)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=556, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=558, solved=0))] (%8997:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=556)]) -> (%8998:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=556, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=560, solved=0))] (%8997:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=556)]) -> (%8999:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=556, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=562, solved=0))] (%8997:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=556)]) -> (%9000:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559, solved=0), )] (%8998:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559)]) -> (%8998:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559, solved=0), )] (%8998:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559)]) -> (%9001:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561, solved=0), )] (%8999:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561)]) -> (%8999:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561, solved=0), )] (%8999:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561)]) -> (%9002:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=0), )] (%9000:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563)]) -> (%9000:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=0), )] (%9000:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563)]) -> (%9003:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=565, solved=0))] (%9001:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559)]) -> (%9004:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=567, solved=0))] (%9002:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=561)]) -> (%9005:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), )] (%9004:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) -> (%9004:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), )] (%9004:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) -> (%9004:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), )] (%9004:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) -> (%9006:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), )] (%9006:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)], %9004:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) -> (%9007:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), )] (%9007:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9008:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), )] (%9004:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9009:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), )] (%9009:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)], %9008:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) -> (%9010:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), )] (%9005:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) -> (%9005:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), )] (%9005:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) -> (%9005:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), )] (%9005:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) -> (%9011:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), )] (%9011:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)], %9005:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) -> (%9012:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), )] (%9012:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9013:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), )] (%9005:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9014:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), )] (%9014:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)], %9013:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) -> (%9015:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=568, solved=0), )] (%9015:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566)]) -> (%9016:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=568)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=568, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569, solved=0), )] (%9016:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=568)]) -> (%9017:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569, solved=0), )] (%9017:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)]) -> (%9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=570, solved=0), )] (%9003:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563)]) -> (%9020:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=570)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=570, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571, solved=0), )] (%9020:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=570)]) -> (%9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17, solved=0), )] (%8236:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)], %9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)]) -> (%9023:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=0), )] (%8237:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)], %9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)]) -> (%9024:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17, solved=0), )] (%9023:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) -> (%9025:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=0), )] (%9024:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)]) -> (%9026:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=0), )] (%9010:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564)], %9025:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=17)]) -> (%9027:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=573, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=0), )] (%9027:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572)], %9028:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=573), constant:[0.088388346]]) -> (%9029:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574, solved=0), )] (%9029:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572)]) -> (%9030:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=575, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574, solved=0), )] (%9030:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574)], %9031:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=575), constant:[-20]]) -> (%9032:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=576, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=577, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9033:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=576), constant:[0]]) -> (%9034:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=577)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=577, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574, solved=0), )] (%9034:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=577)], %9029:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572)], %9032:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574)]) -> (%9035:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=578, solved=0), )] (%9035:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=574)]) -> (%9036:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=578)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=578, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=0), )] (%9036:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=578)], %9026:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45)]) -> (%9037:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=0), )] (%9037:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579)]) -> (%9038:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=0), )] (%9038:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579)]) -> (%9038:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=581, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=580, solved=0))] (%9038:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579)]) -> (%9039:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=581)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=581, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%8996:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9039:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=581)]) -> (%9040:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=583, solved=0))] (%9040:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9041:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=584, solved=0))] (%9041:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582)]) -> (%9042:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=586, solved=0))] (%9041:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582)]) -> (%9043:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=588, solved=0), )] (%9043:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587)]) -> (%9044:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=588)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=588, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587, solved=0), )] (%9043:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587)], %9044:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=588)]) -> (%9045:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587, solved=0), )] (%9045:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587)], %9042:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585)]) -> (%9046:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=589, solved=0))] (%9046:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=587)]) -> (%9047:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9040:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9047:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590)]) -> (%9048:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=592, solved=0))] (%9048:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9049:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=593, solved=0))] (%9049:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591)]) -> (%9050:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=595, solved=0))] (%9049:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591)]) -> (%9051:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=597, solved=0))] (%9049:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591)]) -> (%9052:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594, solved=0), )] (%9050:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594)]) -> (%9050:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594, solved=0), )] (%9050:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594)]) -> (%9053:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596, solved=0), )] (%9051:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596)]) -> (%9051:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596, solved=0), )] (%9051:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596)]) -> (%9054:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=0), )] (%9052:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598)]) -> (%9052:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=0), )] (%9052:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598)]) -> (%9055:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=600, solved=0))] (%9053:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=594)]) -> (%9056:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=602, solved=0))] (%9054:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596)]) -> (%9057:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), )] (%9056:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) -> (%9056:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), )] (%9056:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) -> (%9056:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), )] (%9056:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) -> (%9058:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), )] (%9058:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)], %9056:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) -> (%9059:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), )] (%9059:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9060:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), )] (%9056:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9061:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), )] (%9061:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)], %9060:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) -> (%9062:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), )] (%9057:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) -> (%9057:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), )] (%9057:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) -> (%9057:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), )] (%9057:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) -> (%9063:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), )] (%9063:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)], %9057:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) -> (%9064:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), )] (%9064:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9065:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), )] (%9057:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9066:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), )] (%9066:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)], %9065:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) -> (%9067:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=603, solved=0), )] (%9067:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601)]) -> (%9068:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=603)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=603, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604, solved=0), )] (%9068:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=603)]) -> (%9069:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604, solved=0), )] (%9069:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)]) -> (%9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=605, solved=0), )] (%9055:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598)]) -> (%9072:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=605)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=605, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606, solved=0), )] (%9072:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=605)]) -> (%9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18, solved=0), )] (%8238:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)], %9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)]) -> (%9075:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=0), )] (%8239:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)], %9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)]) -> (%9076:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18, solved=0), )] (%9075:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) -> (%9077:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=0), )] (%9076:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)]) -> (%9078:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607, solved=0), )] (%9062:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=599)], %9077:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=18)]) -> (%9079:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=608, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607, solved=0), )] (%9079:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607)], %9080:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=608), constant:[0.088388346]]) -> (%9081:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=0), )] (%9081:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607)]) -> (%9082:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=610, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=0), )] (%9082:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609)], %9083:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=610), constant:[-20]]) -> (%9084:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=611, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=612, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9085:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=611), constant:[0]]) -> (%9086:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=612)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=612, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=0), )] (%9086:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=612)], %9081:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607)], %9084:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609)]) -> (%9087:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613, solved=0), )] (%9087:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609)]) -> (%9088:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614, solved=0), )] (%9088:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613)], %9078:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46)]) -> (%9089:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614, solved=0), )] (%9089:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614)]) -> (%9090:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614, solved=0), )] (%9090:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614)]) -> (%9090:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=616, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=615, solved=0))] (%9090:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=614)]) -> (%9091:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=616)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=616, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9048:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9091:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=616)]) -> (%9092:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=0))] (%9092:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9093:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=619, solved=0))] (%9093:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617)]) -> (%9094:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=621, solved=0))] (%9093:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617)]) -> (%9095:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623, solved=0), )] (%9095:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622)]) -> (%9096:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=0), )] (%9095:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622)], %9096:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623)]) -> (%9097:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=0), )] (%9097:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622)], %9094:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620)]) -> (%9098:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=625, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=624, solved=0))] (%9098:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622)]) -> (%9099:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=625)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=625, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9092:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9099:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=625)]) -> (%9100:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=626, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=627, solved=0))] (%9100:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9101:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=626)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=626, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=628, solved=0))] (%9101:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=626)]) -> (%9102:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=626, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=630, solved=0))] (%9101:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=626)]) -> (%9103:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=626, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=632, solved=0))] (%9101:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=626)]) -> (%9104:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629, solved=0), )] (%9102:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629)]) -> (%9102:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629, solved=0), )] (%9102:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629)]) -> (%9105:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631, solved=0), )] (%9103:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631)]) -> (%9103:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631, solved=0), )] (%9103:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631)]) -> (%9106:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633, solved=0), )] (%9104:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633)]) -> (%9104:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633, solved=0), )] (%9104:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633)]) -> (%9107:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=635, solved=0))] (%9105:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629)]) -> (%9108:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=637, solved=0))] (%9106:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631)]) -> (%9109:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), )] (%9108:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) -> (%9108:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), )] (%9108:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) -> (%9108:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), )] (%9108:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) -> (%9110:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), )] (%9110:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)], %9108:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) -> (%9111:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), )] (%9111:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9112:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), )] (%9108:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9113:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), )] (%9113:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)], %9112:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) -> (%9114:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), )] (%9109:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) -> (%9109:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), )] (%9109:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) -> (%9109:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), )] (%9109:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) -> (%9115:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), )] (%9115:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)], %9109:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) -> (%9116:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), )] (%9116:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9117:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), )] (%9109:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9118:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), )] (%9118:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)], %9117:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) -> (%9119:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=638, solved=0), )] (%9119:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636)]) -> (%9120:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=638)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=638, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639, solved=0), )] (%9120:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=638)]) -> (%9121:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639, solved=0), )] (%9121:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)]) -> (%9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=640, solved=0), )] (%9107:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=633)]) -> (%9124:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=640)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=640, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641, solved=0), )] (%9124:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=640)]) -> (%9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19, solved=0), )] (%8240:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)], %9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)]) -> (%9127:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=0), )] (%8241:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)], %9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)]) -> (%9128:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19, solved=0), )] (%9127:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) -> (%9129:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=0), )] (%9128:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)]) -> (%9130:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642, solved=0), )] (%9114:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634)], %9129:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=19)]) -> (%9131:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=643, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642, solved=0), )] (%9131:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642)], %9132:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=643), constant:[0.088388346]]) -> (%9133:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=0), )] (%9133:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642)]) -> (%9134:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=645, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=0), )] (%9134:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644)], %9135:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=645), constant:[-20]]) -> (%9136:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=646, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=647, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9137:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=646), constant:[0]]) -> (%9138:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=647)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=647, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=0), )] (%9138:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=647)], %9133:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642)], %9136:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644)]) -> (%9139:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648, solved=0), )] (%9139:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644)]) -> (%9140:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649, solved=0), )] (%9140:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648)], %9130:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47)]) -> (%9141:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649, solved=0), )] (%9141:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649)]) -> (%9142:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649, solved=0), )] (%9142:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649)]) -> (%9142:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=650, solved=0))] (%9142:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649)]) -> (%9143:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9100:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9143:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651)]) -> (%9144:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=652, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=653, solved=0))] (%9144:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9145:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=652)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=652, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=654, solved=0))] (%9145:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=652)]) -> (%9146:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=652, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=656, solved=0))] (%9145:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=652)]) -> (%9147:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=0), )] (%9147:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657)]) -> (%9148:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657, solved=0), )] (%9147:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657)], %9148:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658)]) -> (%9149:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657, solved=0), )] (%9149:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657)], %9146:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655)]) -> (%9150:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=659, solved=0))] (%9150:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657)]) -> (%9151:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9144:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9151:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660)]) -> (%9152:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=662, solved=0))] (%9152:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9153:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=663, solved=0))] (%9153:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661)]) -> (%9154:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=665, solved=0))] (%9153:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661)]) -> (%9155:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=667, solved=0))] (%9153:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661)]) -> (%9156:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664, solved=0), )] (%9154:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664)]) -> (%9154:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664, solved=0), )] (%9154:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664)]) -> (%9157:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=0), )] (%9155:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666)]) -> (%9155:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=0), )] (%9155:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666)]) -> (%9158:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=0), )] (%9156:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668)]) -> (%9156:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=0), )] (%9156:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668)]) -> (%9159:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=670, solved=0))] (%9157:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=664)]) -> (%9160:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=672, solved=0))] (%9158:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666)]) -> (%9161:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), )] (%9160:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) -> (%9160:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), )] (%9160:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) -> (%9160:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), )] (%9160:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) -> (%9162:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), )] (%9162:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)], %9160:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) -> (%9163:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), )] (%9163:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9164:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), )] (%9160:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9165:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), )] (%9165:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)], %9164:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) -> (%9166:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), )] (%9161:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) -> (%9161:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), )] (%9161:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) -> (%9161:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), )] (%9161:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) -> (%9167:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), )] (%9167:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)], %9161:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) -> (%9168:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), )] (%9168:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9169:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), )] (%9161:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9170:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), )] (%9170:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)], %9169:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) -> (%9171:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=673, solved=0), )] (%9171:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=671)]) -> (%9172:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=673)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=673, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674, solved=0), )] (%9172:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=673)]) -> (%9173:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674, solved=0), )] (%9173:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)]) -> (%9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=675, solved=0), )] (%9159:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668)]) -> (%9176:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=675)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=675, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676, solved=0), )] (%9176:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=675)]) -> (%9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20, solved=0), )] (%8242:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)], %9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)]) -> (%9179:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=0), )] (%8243:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)], %9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)]) -> (%9180:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20, solved=0), )] (%9179:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) -> (%9181:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=0), )] (%9180:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)]) -> (%9182:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=0), )] (%9166:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669)], %9181:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=20)]) -> (%9183:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=678, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=0), )] (%9183:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677)], %9184:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=678), constant:[0.088388346]]) -> (%9185:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679, solved=0), )] (%9185:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677)]) -> (%9186:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=680, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679, solved=0), )] (%9186:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679)], %9187:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=680), constant:[-20]]) -> (%9188:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=681, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=682, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9189:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=681), constant:[0]]) -> (%9190:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=682)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=682, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679, solved=0), )] (%9190:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=682)], %9185:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677)], %9188:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679)]) -> (%9191:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=683, solved=0), )] (%9191:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=679)]) -> (%9192:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=683)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=683, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684, solved=0), )] (%9192:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=683)], %9182:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48)]) -> (%9193:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684, solved=0), )] (%9193:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684)]) -> (%9194:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684, solved=0), )] (%9194:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684)]) -> (%9194:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=685, solved=0))] (%9194:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=684)]) -> (%9195:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9152:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9195:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686)]) -> (%9196:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=687, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=688, solved=0))] (%9196:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9197:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=687)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=687, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=690, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=689, solved=0))] (%9197:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=687)]) -> (%9198:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=690)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=687, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=691, solved=0))] (%9197:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=687)]) -> (%9199:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693, solved=0), )] (%9199:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692)]) -> (%9200:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692, solved=0), )] (%9199:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692)], %9200:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693)]) -> (%9201:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=690, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692, solved=0), )] (%9201:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692)], %9198:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=690)]) -> (%9202:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=695, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=694, solved=0))] (%9202:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=692)]) -> (%9203:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=695)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=695, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9196:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9203:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=695)]) -> (%9204:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=697, solved=0))] (%9204:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9205:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=698, solved=0))] (%9205:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696)]) -> (%9206:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=700, solved=0))] (%9205:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696)]) -> (%9207:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=702, solved=0))] (%9205:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696)]) -> (%9208:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=0), )] (%9206:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699)]) -> (%9206:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=0), )] (%9206:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699)]) -> (%9209:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701, solved=0), )] (%9207:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701)]) -> (%9207:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701, solved=0), )] (%9207:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701)]) -> (%9210:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703, solved=0), )] (%9208:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703)]) -> (%9208:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703, solved=0), )] (%9208:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703)]) -> (%9211:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=705, solved=0))] (%9209:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699)]) -> (%9212:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=707, solved=0))] (%9210:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=701)]) -> (%9213:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), )] (%9212:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) -> (%9212:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), )] (%9212:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) -> (%9212:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), )] (%9212:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) -> (%9214:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), )] (%9214:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)], %9212:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) -> (%9215:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), )] (%9215:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9216:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), )] (%9212:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9217:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), )] (%9217:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)], %9216:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) -> (%9218:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), )] (%9213:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) -> (%9213:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), )] (%9213:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) -> (%9213:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), )] (%9213:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) -> (%9219:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), )] (%9219:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)], %9213:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) -> (%9220:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), )] (%9220:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9221:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), )] (%9213:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9222:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), )] (%9222:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)], %9221:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) -> (%9223:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=708, solved=0), )] (%9223:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706)]) -> (%9224:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=708)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=708, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709, solved=0), )] (%9224:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=708)]) -> (%9225:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709, solved=0), )] (%9225:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)]) -> (%9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=710, solved=0), )] (%9211:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=703)]) -> (%9228:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=710)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=710, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711, solved=0), )] (%9228:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=710)]) -> (%9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21, solved=0), )] (%8244:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)], %9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)]) -> (%9231:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=0), )] (%8245:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)], %9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)]) -> (%9232:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21, solved=0), )] (%9231:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) -> (%9233:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=0), )] (%9232:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)]) -> (%9234:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=0), )] (%9218:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704)], %9233:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=21)]) -> (%9235:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=713, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=0), )] (%9235:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712)], %9236:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=713), constant:[0.088388346]]) -> (%9237:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=0), )] (%9237:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712)]) -> (%9238:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=715, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=0), )] (%9238:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714)], %9239:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=715), constant:[-20]]) -> (%9240:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=716, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=717, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9241:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=716), constant:[0]]) -> (%9242:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=717)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=717, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=0), )] (%9242:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=717)], %9237:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712)], %9240:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714)]) -> (%9243:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=718, solved=0), )] (%9243:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714)]) -> (%9244:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=718)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=718, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719, solved=0), )] (%9244:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=718)], %9234:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49)]) -> (%9245:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719, solved=0), )] (%9245:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719)]) -> (%9246:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719, solved=0), )] (%9246:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719)]) -> (%9246:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=721, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=720, solved=0))] (%9246:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=719)]) -> (%9247:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=721)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=721, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9204:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9247:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=721)]) -> (%9248:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=722, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=723, solved=0))] (%9248:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9249:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=722)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=722, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=725, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=724, solved=0))] (%9249:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=722)]) -> (%9250:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=725)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=722, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=726, solved=0))] (%9249:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=722)]) -> (%9251:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=728, solved=0), )] (%9251:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727)]) -> (%9252:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=728)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=728, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=0), )] (%9251:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727)], %9252:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=728)]) -> (%9253:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=725, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=0), )] (%9253:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727)], %9250:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=725)]) -> (%9254:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=730, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=729, solved=0))] (%9254:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727)]) -> (%9255:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=730)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=730, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9248:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9255:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=730)]) -> (%9256:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=0))] (%9256:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9257:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=733, solved=0))] (%9257:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731)]) -> (%9258:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=735, solved=0))] (%9257:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731)]) -> (%9259:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=737, solved=0))] (%9257:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731)]) -> (%9260:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=0), )] (%9258:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734)]) -> (%9258:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=0), )] (%9258:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734)]) -> (%9261:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=0), )] (%9259:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736)]) -> (%9259:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=0), )] (%9259:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736)]) -> (%9262:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738, solved=0), )] (%9260:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738)]) -> (%9260:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738, solved=0), )] (%9260:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738)]) -> (%9263:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=740, solved=0))] (%9261:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734)]) -> (%9264:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=742, solved=0))] (%9262:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736)]) -> (%9265:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), )] (%9264:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) -> (%9264:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), )] (%9264:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) -> (%9264:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), )] (%9264:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) -> (%9266:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), )] (%9266:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)], %9264:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) -> (%9267:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), )] (%9267:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9268:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), )] (%9264:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9269:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), )] (%9269:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)], %9268:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) -> (%9270:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), )] (%9265:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) -> (%9265:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), )] (%9265:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) -> (%9265:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), )] (%9265:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) -> (%9271:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), )] (%9271:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)], %9265:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) -> (%9272:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), )] (%9272:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9273:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), )] (%9265:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9274:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), )] (%9274:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)], %9273:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) -> (%9275:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=743, solved=0), )] (%9275:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=741)]) -> (%9276:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=743)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=743, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744, solved=0), )] (%9276:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=743)]) -> (%9277:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744, solved=0), )] (%9277:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)]) -> (%9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=745, solved=0), )] (%9263:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=738)]) -> (%9280:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=745)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=745, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746, solved=0), )] (%9280:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=745)]) -> (%9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22, solved=0), )] (%8246:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)], %9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)]) -> (%9283:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=0), )] (%8247:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)], %9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)]) -> (%9284:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22, solved=0), )] (%9283:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) -> (%9285:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=0), )] (%9284:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)]) -> (%9286:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747, solved=0), )] (%9270:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=739)], %9285:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=22)]) -> (%9287:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=748, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747, solved=0), )] (%9287:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747)], %9288:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=748), constant:[0.088388346]]) -> (%9289:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749, solved=0), )] (%9289:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747)]) -> (%9290:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=750, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749, solved=0), )] (%9290:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749)], %9291:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=750), constant:[-20]]) -> (%9292:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=751, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=752, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9293:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=751), constant:[0]]) -> (%9294:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=752)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=752, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749, solved=0), )] (%9294:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=752)], %9289:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=747)], %9292:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749)]) -> (%9295:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=753, solved=0), )] (%9295:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749)]) -> (%9296:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=753)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=753, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754, solved=0), )] (%9296:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=753)], %9286:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50)]) -> (%9297:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754, solved=0), )] (%9297:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754)]) -> (%9298:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754, solved=0), )] (%9298:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754)]) -> (%9298:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=755, solved=0))] (%9298:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754)]) -> (%9299:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9256:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9299:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756)]) -> (%9300:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=757, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=0))] (%9300:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9301:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=757)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=757, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=760, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=759, solved=0))] (%9301:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=757)]) -> (%9302:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=760)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=757, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=761, solved=0))] (%9301:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=757)]) -> (%9303:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=763, solved=0), )] (%9303:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762)]) -> (%9304:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=763)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=763, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=0), )] (%9303:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762)], %9304:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=763)]) -> (%9305:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=760, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=0), )] (%9305:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762)], %9302:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=760)]) -> (%9306:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=764, solved=0))] (%9306:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762)]) -> (%9307:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9300:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9307:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765)]) -> (%9308:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=766, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=767, solved=0))] (%9308:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9309:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=766)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=766, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=768, solved=0))] (%9309:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=766)]) -> (%9310:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=766, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=770, solved=0))] (%9309:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=766)]) -> (%9311:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=766, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=772, solved=0))] (%9309:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=766)]) -> (%9312:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=0), )] (%9310:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769)]) -> (%9310:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=0), )] (%9310:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769)]) -> (%9313:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771, solved=0), )] (%9311:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771)]) -> (%9311:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771, solved=0), )] (%9311:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771)]) -> (%9314:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773, solved=0), )] (%9312:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773)]) -> (%9312:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773, solved=0), )] (%9312:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773)]) -> (%9315:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=775, solved=0))] (%9313:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769)]) -> (%9316:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=777, solved=0))] (%9314:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771)]) -> (%9317:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), )] (%9316:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) -> (%9316:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), )] (%9316:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) -> (%9316:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), )] (%9316:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) -> (%9318:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), )] (%9318:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)], %9316:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) -> (%9319:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), )] (%9319:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9320:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), )] (%9316:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9321:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), )] (%9321:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)], %9320:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) -> (%9322:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), )] (%9317:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) -> (%9317:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), )] (%9317:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) -> (%9317:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), )] (%9317:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) -> (%9323:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), )] (%9323:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)], %9317:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) -> (%9324:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), )] (%9324:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9325:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), )] (%9317:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9326:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), )] (%9326:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)], %9325:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) -> (%9327:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=778, solved=0), )] (%9327:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=776)]) -> (%9328:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=778)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=778, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779, solved=0), )] (%9328:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=778)]) -> (%9329:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779, solved=0), )] (%9329:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)]) -> (%9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=780, solved=0), )] (%9315:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773)]) -> (%9332:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=780)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=780, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781, solved=0), )] (%9332:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=780)]) -> (%9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23, solved=0), )] (%8248:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)], %9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)]) -> (%9335:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=0), )] (%8249:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)], %9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)]) -> (%9336:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23, solved=0), )] (%9335:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) -> (%9337:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=0), )] (%9336:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)]) -> (%9338:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782, solved=0), )] (%9322:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774)], %9337:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=23)]) -> (%9339:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=783, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782, solved=0), )] (%9339:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782)], %9340:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=783), constant:[0.088388346]]) -> (%9341:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784, solved=0), )] (%9341:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782)]) -> (%9342:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=785, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784, solved=0), )] (%9342:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784)], %9343:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=785), constant:[-20]]) -> (%9344:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=786, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=787, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9345:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=786), constant:[0]]) -> (%9346:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=787)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=787, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784, solved=0), )] (%9346:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=787)], %9341:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782)], %9344:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784)]) -> (%9347:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788, solved=0), )] (%9347:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=784)]) -> (%9348:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789, solved=0), )] (%9348:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788)], %9338:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51)]) -> (%9349:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789, solved=0), )] (%9349:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789)]) -> (%9350:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789, solved=0), )] (%9350:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789)]) -> (%9350:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=790, solved=0))] (%9350:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=789)]) -> (%9351:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9308:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9351:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791)]) -> (%9352:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=792, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=793, solved=0))] (%9352:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9353:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=792)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=792, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=795, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=794, solved=0))] (%9353:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=792)]) -> (%9354:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=795)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=792, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=796, solved=0))] (%9353:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=792)]) -> (%9355:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=798, solved=0), )] (%9355:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797)]) -> (%9356:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=798)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=798, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797, solved=0), )] (%9355:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797)], %9356:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=798)]) -> (%9357:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=795, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797, solved=0), )] (%9357:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797)], %9354:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=795)]) -> (%9358:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=799, solved=0))] (%9358:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797)]) -> (%9359:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9352:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9359:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800)]) -> (%9360:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=802, solved=0))] (%9360:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9361:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=803, solved=0))] (%9361:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801)]) -> (%9362:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=805, solved=0))] (%9361:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801)]) -> (%9363:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=807, solved=0))] (%9361:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801)]) -> (%9364:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804, solved=0), )] (%9362:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804)]) -> (%9362:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804, solved=0), )] (%9362:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804)]) -> (%9365:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806, solved=0), )] (%9363:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806)]) -> (%9363:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806, solved=0), )] (%9363:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806)]) -> (%9366:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=0), )] (%9364:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808)]) -> (%9364:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=0), )] (%9364:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808)]) -> (%9367:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=0))] (%9365:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=804)]) -> (%9368:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=812, solved=0))] (%9366:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=806)]) -> (%9369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), )] (%9368:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) -> (%9368:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), )] (%9368:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) -> (%9368:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), )] (%9368:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) -> (%9370:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), )] (%9370:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)], %9368:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) -> (%9371:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), )] (%9371:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9372:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), )] (%9368:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9373:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), )] (%9373:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)], %9372:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) -> (%9374:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), )] (%9369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) -> (%9369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), )] (%9369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) -> (%9369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), )] (%9369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) -> (%9375:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), )] (%9375:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)], %9369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) -> (%9376:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), )] (%9376:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9377:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), )] (%9369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9378:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), )] (%9378:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)], %9377:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) -> (%9379:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=813, solved=0), )] (%9379:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811)]) -> (%9380:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=813)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=813, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814, solved=0), )] (%9380:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=813)]) -> (%9381:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814, solved=0), )] (%9381:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)]) -> (%9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=815, solved=0), )] (%9367:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808)]) -> (%9384:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=815)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=815, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816, solved=0), )] (%9384:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=815)]) -> (%9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24, solved=0), )] (%8250:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)], %9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)]) -> (%9387:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=0), )] (%8251:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)], %9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)]) -> (%9388:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24, solved=0), )] (%9387:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) -> (%9389:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=0), )] (%9388:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)]) -> (%9390:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817, solved=0), )] (%9374:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809)], %9389:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=24)]) -> (%9391:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=818, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817, solved=0), )] (%9391:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817)], %9392:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=818), constant:[0.088388346]]) -> (%9393:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819, solved=0), )] (%9393:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817)]) -> (%9394:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=820, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819, solved=0), )] (%9394:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819)], %9395:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=820), constant:[-20]]) -> (%9396:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=821, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=822, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9397:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=821), constant:[0]]) -> (%9398:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=822)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=822, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819, solved=0), )] (%9398:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=822)], %9393:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=817)], %9396:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819)]) -> (%9399:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=823, solved=0), )] (%9399:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819)]) -> (%9400:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=823)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=823, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824, solved=0), )] (%9400:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=823)], %9390:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52)]) -> (%9401:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824, solved=0), )] (%9401:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824)]) -> (%9402:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824, solved=0), )] (%9402:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824)]) -> (%9402:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=825, solved=0))] (%9402:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824)]) -> (%9403:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9360:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9403:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826)]) -> (%9404:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=827, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=828, solved=0))] (%9404:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9405:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=827)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=827, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=830, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=829, solved=0))] (%9405:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=827)]) -> (%9406:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=830)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=827, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=831, solved=0))] (%9405:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=827)]) -> (%9407:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=833, solved=0), )] (%9407:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832)]) -> (%9408:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=833)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=833, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=0), )] (%9407:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832)], %9408:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=833)]) -> (%9409:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=830, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=0), )] (%9409:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832)], %9406:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=830)]) -> (%9410:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=835, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=834, solved=0))] (%9410:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832)]) -> (%9411:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=835)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=835, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9404:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9411:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=835)]) -> (%9412:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=836, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=837, solved=0))] (%9412:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9413:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=836)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=836, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=838, solved=0))] (%9413:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=836)]) -> (%9414:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=836, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=840, solved=0))] (%9413:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=836)]) -> (%9415:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=836, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=842, solved=0))] (%9413:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=836)]) -> (%9416:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839, solved=0), )] (%9414:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839)]) -> (%9414:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839, solved=0), )] (%9414:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839)]) -> (%9417:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=0), )] (%9415:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841)]) -> (%9415:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=0), )] (%9415:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841)]) -> (%9418:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=0), )] (%9416:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843)]) -> (%9416:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=0), )] (%9416:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843)]) -> (%9419:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=845, solved=0))] (%9417:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839)]) -> (%9420:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=847, solved=0))] (%9418:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841)]) -> (%9421:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), )] (%9420:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) -> (%9420:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), )] (%9420:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) -> (%9420:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), )] (%9420:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) -> (%9422:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), )] (%9422:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)], %9420:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) -> (%9423:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), )] (%9423:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9424:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), )] (%9420:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9425:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), )] (%9425:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)], %9424:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) -> (%9426:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), )] (%9421:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) -> (%9421:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), )] (%9421:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) -> (%9421:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), )] (%9421:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) -> (%9427:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), )] (%9427:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)], %9421:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) -> (%9428:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), )] (%9428:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9429:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), )] (%9421:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9430:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), )] (%9430:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)], %9429:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) -> (%9431:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=848, solved=0), )] (%9431:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846)]) -> (%9432:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=848)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=848, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849, solved=0), )] (%9432:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=848)]) -> (%9433:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849, solved=0), )] (%9433:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)]) -> (%9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=850, solved=0), )] (%9419:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843)]) -> (%9436:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=850)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=850, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851, solved=0), )] (%9436:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=850)]) -> (%9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25, solved=0), )] (%8252:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)], %9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)]) -> (%9439:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=0), )] (%8253:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)], %9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)]) -> (%9440:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25, solved=0), )] (%9439:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) -> (%9441:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=0), )] (%9440:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)]) -> (%9442:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852, solved=0), )] (%9426:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=844)], %9441:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=25)]) -> (%9443:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=853, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852, solved=0), )] (%9443:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852)], %9444:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=853), constant:[0.088388346]]) -> (%9445:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854, solved=0), )] (%9445:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852)]) -> (%9446:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=855, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854, solved=0), )] (%9446:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854)], %9447:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=855), constant:[-20]]) -> (%9448:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=856, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=857, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9449:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=856), constant:[0]]) -> (%9450:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=857)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=857, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854, solved=0), )] (%9450:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=857)], %9445:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=852)], %9448:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854)]) -> (%9451:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=858, solved=0), )] (%9451:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=854)]) -> (%9452:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=858)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=858, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859, solved=0), )] (%9452:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=858)], %9442:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53)]) -> (%9453:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859, solved=0), )] (%9453:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859)]) -> (%9454:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859, solved=0), )] (%9454:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859)]) -> (%9454:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=861, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=860, solved=0))] (%9454:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859)]) -> (%9455:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=861)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=861, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9412:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9455:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=861)]) -> (%9456:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=862, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=863, solved=0))] (%9456:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9457:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=862)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=862, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=865, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=864, solved=0))] (%9457:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=862)]) -> (%9458:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=865)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=862, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=866, solved=0))] (%9457:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=862)]) -> (%9459:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=868, solved=0), )] (%9459:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867)]) -> (%9460:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=868)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=868, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=0), )] (%9459:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867)], %9460:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=868)]) -> (%9461:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=865, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=0), )] (%9461:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867)], %9458:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=865)]) -> (%9462:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=869, solved=0))] (%9462:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867)]) -> (%9463:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9456:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9463:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870)]) -> (%9464:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=871, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=0))] (%9464:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9465:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=871)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=871, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=873, solved=0))] (%9465:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=871)]) -> (%9466:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=871, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=875, solved=0))] (%9465:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=871)]) -> (%9467:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=871, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=877, solved=0))] (%9465:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=871)]) -> (%9468:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874, solved=0), )] (%9466:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874)]) -> (%9466:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874, solved=0), )] (%9466:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874)]) -> (%9469:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=0), )] (%9467:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876)]) -> (%9467:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=0), )] (%9467:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876)]) -> (%9470:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878, solved=0), )] (%9468:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878)]) -> (%9468:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878, solved=0), )] (%9468:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878)]) -> (%9471:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=880, solved=0))] (%9469:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=874)]) -> (%9472:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=882, solved=0))] (%9470:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876)]) -> (%9473:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), )] (%9472:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) -> (%9472:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), )] (%9472:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) -> (%9472:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), )] (%9472:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) -> (%9474:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), )] (%9474:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)], %9472:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) -> (%9475:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), )] (%9475:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9476:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), )] (%9472:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9477:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), )] (%9477:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)], %9476:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) -> (%9478:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), )] (%9473:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) -> (%9473:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), )] (%9473:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) -> (%9473:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), )] (%9473:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) -> (%9479:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), )] (%9479:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)], %9473:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) -> (%9480:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), )] (%9480:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9481:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), )] (%9473:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9482:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), )] (%9482:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)], %9481:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) -> (%9483:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=883, solved=0), )] (%9483:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881)]) -> (%9484:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=883)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=883, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884, solved=0), )] (%9484:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=883)]) -> (%9485:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884, solved=0), )] (%9485:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)]) -> (%9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=885, solved=0), )] (%9471:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=878)]) -> (%9488:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=885)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=885, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886, solved=0), )] (%9488:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=885)]) -> (%9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26, solved=0), )] (%8254:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)], %9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)]) -> (%9491:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=0), )] (%8255:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)], %9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)]) -> (%9492:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26, solved=0), )] (%9491:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) -> (%9493:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=0), )] (%9492:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)]) -> (%9494:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887, solved=0), )] (%9478:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879)], %9493:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=26)]) -> (%9495:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=888, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887, solved=0), )] (%9495:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887)], %9496:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=888), constant:[0.088388346]]) -> (%9497:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=0), )] (%9497:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887)]) -> (%9498:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=890, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=0), )] (%9498:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889)], %9499:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=890), constant:[-20]]) -> (%9500:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=891, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=892, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9501:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=891), constant:[0]]) -> (%9502:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=892)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=892, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=0), )] (%9502:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=892)], %9497:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887)], %9500:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889)]) -> (%9503:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=893, solved=0), )] (%9503:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889)]) -> (%9504:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=893)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=893, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=0), )] (%9504:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=893)], %9494:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54)]) -> (%9505:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=0), )] (%9505:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894)]) -> (%9506:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=0), )] (%9506:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894)]) -> (%9506:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=895, solved=0))] (%9506:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894)]) -> (%9507:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9464:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9507:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896)]) -> (%9508:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=897, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=898, solved=0))] (%9508:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9509:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=897)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=897, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=900, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=899, solved=0))] (%9509:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=897)]) -> (%9510:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=900)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=897, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=901, solved=0))] (%9509:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=897)]) -> (%9511:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=903, solved=0), )] (%9511:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902)]) -> (%9512:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=903)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=903, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=0), )] (%9511:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902)], %9512:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=903)]) -> (%9513:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=900, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=0), )] (%9513:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902)], %9510:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=900)]) -> (%9514:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=904, solved=0))] (%9514:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902)]) -> (%9515:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9508:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9515:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905)]) -> (%9516:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=907, solved=0))] (%9516:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9517:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=908, solved=0))] (%9517:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906)]) -> (%9518:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=910, solved=0))] (%9517:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906)]) -> (%9519:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=912, solved=0))] (%9517:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906)]) -> (%9520:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909, solved=0), )] (%9518:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909)]) -> (%9518:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909, solved=0), )] (%9518:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909)]) -> (%9521:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911, solved=0), )] (%9519:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911)]) -> (%9519:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911, solved=0), )] (%9519:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911)]) -> (%9522:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=0), )] (%9520:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913)]) -> (%9520:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=0), )] (%9520:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913)]) -> (%9523:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=915, solved=0))] (%9521:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=909)]) -> (%9524:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=917, solved=0))] (%9522:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911)]) -> (%9525:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), )] (%9524:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) -> (%9524:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), )] (%9524:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) -> (%9524:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), )] (%9524:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) -> (%9526:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), )] (%9526:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)], %9524:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) -> (%9527:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), )] (%9527:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9528:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), )] (%9524:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9529:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), )] (%9529:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)], %9528:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) -> (%9530:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), )] (%9525:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) -> (%9525:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), )] (%9525:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) -> (%9525:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), )] (%9525:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) -> (%9531:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), )] (%9531:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)], %9525:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) -> (%9532:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), )] (%9532:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9533:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), )] (%9525:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9534:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), )] (%9534:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)], %9533:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) -> (%9535:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=918, solved=0), )] (%9535:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=916)]) -> (%9536:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=918)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=918, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919, solved=0), )] (%9536:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=918)]) -> (%9537:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919, solved=0), )] (%9537:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)]) -> (%9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=920, solved=0), )] (%9523:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913)]) -> (%9540:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=920)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=920, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921, solved=0), )] (%9540:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=920)]) -> (%9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27, solved=0), )] (%8256:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)], %9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)]) -> (%9543:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=0), )] (%8257:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)], %9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)]) -> (%9544:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27, solved=0), )] (%9543:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) -> (%9545:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=0), )] (%9544:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)]) -> (%9546:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=0), )] (%9530:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914)], %9545:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=27)]) -> (%9547:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=923, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=0), )] (%9547:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922)], %9548:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=923), constant:[0.088388346]]) -> (%9549:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=0), )] (%9549:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922)]) -> (%9550:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=925, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=0), )] (%9550:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924)], %9551:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=925), constant:[-20]]) -> (%9552:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=926, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=927, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9553:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=926), constant:[0]]) -> (%9554:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=927)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=927, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=0), )] (%9554:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=927)], %9549:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922)], %9552:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924)]) -> (%9555:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=928, solved=0), )] (%9555:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924)]) -> (%9556:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=928)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=928, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929, solved=0), )] (%9556:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=928)], %9546:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55)]) -> (%9557:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929, solved=0), )] (%9557:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929)]) -> (%9558:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929, solved=0), )] (%9558:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929)]) -> (%9558:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=931, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=930, solved=0))] (%9558:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=929)]) -> (%9559:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=931)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=931, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9516:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9559:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=931)]) -> (%9560:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=933, solved=0))] (%9560:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9561:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=935, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=934, solved=0))] (%9561:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932)]) -> (%9562:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=935)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=936, solved=0))] (%9561:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932)]) -> (%9563:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=938, solved=0), )] (%9563:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937)]) -> (%9564:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=938)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=938, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937, solved=0), )] (%9563:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937)], %9564:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=938)]) -> (%9565:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=935, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937, solved=0), )] (%9565:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937)], %9562:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=935)]) -> (%9566:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=939, solved=0))] (%9566:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=937)]) -> (%9567:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9560:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9567:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940)]) -> (%9568:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=941, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=942, solved=0))] (%9568:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9569:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=941)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=941, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=943, solved=0))] (%9569:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=941)]) -> (%9570:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=941, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=945, solved=0))] (%9569:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=941)]) -> (%9571:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=941, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=947, solved=0))] (%9569:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=941)]) -> (%9572:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944, solved=0), )] (%9570:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944)]) -> (%9570:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944, solved=0), )] (%9570:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944)]) -> (%9573:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=0), )] (%9571:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946)]) -> (%9571:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=0), )] (%9571:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946)]) -> (%9574:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=0), )] (%9572:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948)]) -> (%9572:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=0), )] (%9572:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948)]) -> (%9575:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=950, solved=0))] (%9573:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944)]) -> (%9576:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=952, solved=0))] (%9574:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946)]) -> (%9577:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), )] (%9576:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) -> (%9576:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), )] (%9576:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) -> (%9576:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), )] (%9576:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) -> (%9578:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), )] (%9578:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)], %9576:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) -> (%9579:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), )] (%9579:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9580:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), )] (%9576:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9581:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), )] (%9581:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)], %9580:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) -> (%9582:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), )] (%9577:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) -> (%9577:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), )] (%9577:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) -> (%9577:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), )] (%9577:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) -> (%9583:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), )] (%9583:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)], %9577:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) -> (%9584:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), )] (%9584:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9585:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), )] (%9577:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9586:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), )] (%9586:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)], %9585:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) -> (%9587:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=953, solved=0), )] (%9587:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951)]) -> (%9588:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=953)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=953, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954, solved=0), )] (%9588:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=953)]) -> (%9589:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954, solved=0), )] (%9589:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)]) -> (%9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=955, solved=0), )] (%9575:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948)]) -> (%9592:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=955)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=955, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956, solved=0), )] (%9592:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=955)]) -> (%9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28, solved=0), )] (%8258:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)], %9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)]) -> (%9595:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=0), )] (%8259:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)], %9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)]) -> (%9596:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28, solved=0), )] (%9595:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) -> (%9597:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=0), )] (%9596:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)]) -> (%9598:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=0), )] (%9582:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949)], %9597:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=28)]) -> (%9599:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=958, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=0), )] (%9599:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957)], %9600:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=958), constant:[0.088388346]]) -> (%9601:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=0), )] (%9601:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957)]) -> (%9602:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=0), )] (%9602:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959)], %9603:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960), constant:[-20]]) -> (%9604:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=961, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=962, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9605:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=961), constant:[0]]) -> (%9606:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=962)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=962, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=0), )] (%9606:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=962)], %9601:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957)], %9604:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959)]) -> (%9607:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=963, solved=0), )] (%9607:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959)]) -> (%9608:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=963)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=963, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=0), )] (%9608:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=963)], %9598:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56)]) -> (%9609:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=0), )] (%9609:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964)]) -> (%9610:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=0), )] (%9610:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964)]) -> (%9610:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=966, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=965, solved=0))] (%9610:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964)]) -> (%9611:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=966)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=966, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9568:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9611:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=966)]) -> (%9612:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=967, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=968, solved=0))] (%9612:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9613:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=967)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=967, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=970, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=969, solved=0))] (%9613:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=967)]) -> (%9614:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=970)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=967, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=971, solved=0))] (%9613:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=967)]) -> (%9615:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=973, solved=0), )] (%9615:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972)]) -> (%9616:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=973)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=973, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=0), )] (%9615:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972)], %9616:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=973)]) -> (%9617:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=970, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=0), )] (%9617:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972)], %9614:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=970)]) -> (%9618:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=975, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=974, solved=0))] (%9618:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972)]) -> (%9619:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=975)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=975, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9612:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9619:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=975)]) -> (%9620:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=977, solved=0))] (%9620:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9621:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=978, solved=0))] (%9621:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976)]) -> (%9622:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=980, solved=0))] (%9621:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976)]) -> (%9623:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=982, solved=0))] (%9621:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976)]) -> (%9624:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979, solved=0), )] (%9622:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979)]) -> (%9622:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979, solved=0), )] (%9622:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979)]) -> (%9625:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=0), )] (%9623:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981)]) -> (%9623:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=0), )] (%9623:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981)]) -> (%9626:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983, solved=0), )] (%9624:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983)]) -> (%9624:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983, solved=0), )] (%9624:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983)]) -> (%9627:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=985, solved=0))] (%9625:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=979)]) -> (%9628:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=987, solved=0))] (%9626:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981)]) -> (%9629:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), )] (%9628:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) -> (%9628:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), )] (%9628:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) -> (%9628:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), )] (%9628:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) -> (%9630:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), )] (%9630:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)], %9628:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) -> (%9631:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), )] (%9631:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9632:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), )] (%9628:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9633:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), )] (%9633:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)], %9632:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) -> (%9634:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), )] (%9629:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) -> (%9629:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), )] (%9629:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) -> (%9629:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), )] (%9629:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) -> (%9635:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), )] (%9635:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)], %9629:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) -> (%9636:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), )] (%9636:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9637:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), )] (%9629:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9638:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), )] (%9638:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)], %9637:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) -> (%9639:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=988, solved=0), )] (%9639:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986)]) -> (%9640:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=988)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=988, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989, solved=0), )] (%9640:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=988)]) -> (%9641:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989, solved=0), )] (%9641:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)]) -> (%9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=990, solved=0), )] (%9627:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=983)]) -> (%9644:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=990)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=990, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991, solved=0), )] (%9644:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=990)]) -> (%9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29, solved=0), )] (%8260:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)], %9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)]) -> (%9647:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=0), )] (%8261:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)], %9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)]) -> (%9648:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29, solved=0), )] (%9647:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) -> (%9649:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=0), )] (%9648:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)]) -> (%9650:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992, solved=0), )] (%9634:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984)], %9649:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=29)]) -> (%9651:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=993, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992, solved=0), )] (%9651:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992)], %9652:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=993), constant:[0.088388346]]) -> (%9653:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994, solved=0), )] (%9653:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992)]) -> (%9654:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=995, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994, solved=0), )] (%9654:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994)], %9655:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=995), constant:[-20]]) -> (%9656:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=996, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=997, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9657:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=996), constant:[0]]) -> (%9658:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=997)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=997, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994, solved=0), )] (%9658:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=997)], %9653:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=992)], %9656:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994)]) -> (%9659:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=0), )] (%9659:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=994)]) -> (%9660:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999, solved=0), )] (%9660:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998)], %9650:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57)]) -> (%9661:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999, solved=0), )] (%9661:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999)]) -> (%9662:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999, solved=0), )] (%9662:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999)]) -> (%9662:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1001, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1000, solved=0))] (%9662:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999)]) -> (%9663:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1001)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1001, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9620:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9663:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1001)]) -> (%9664:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1003, solved=0))] (%9664:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9665:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1005, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1004, solved=0))] (%9665:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002)]) -> (%9666:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1005)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1006, solved=0))] (%9665:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002)]) -> (%9667:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1008, solved=0), )] (%9667:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007)]) -> (%9668:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1008)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1008, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007, solved=0), )] (%9667:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007)], %9668:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1008)]) -> (%9669:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1005, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007, solved=0), )] (%9669:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007)], %9666:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1005)]) -> (%9670:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1009, solved=0))] (%9670:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1007)]) -> (%9671:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9664:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9671:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010)]) -> (%9672:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1012, solved=0))] (%9672:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9673:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1013, solved=0))] (%9673:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011)]) -> (%9674:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1015, solved=0))] (%9673:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011)]) -> (%9675:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1017, solved=0))] (%9673:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011)]) -> (%9676:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014, solved=0), )] (%9674:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014)]) -> (%9674:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014, solved=0), )] (%9674:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014)]) -> (%9677:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=0), )] (%9675:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016)]) -> (%9675:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=0), )] (%9675:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016)]) -> (%9678:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=0), )] (%9676:tensor<[1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018)]) -> (%9676:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=0), )] (%9676:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018)]) -> (%9679:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1020, solved=0))] (%9677:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014)]) -> (%9680:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1022, solved=0))] (%9678:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016)]) -> (%9681:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), )] (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), )] (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), )] (%9680:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) -> (%9680:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), )] (%9680:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) -> (%9680:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), )] (%9680:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) -> (%9682:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), )] (%9682:tensor<[1, 16, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)], %9680:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) -> (%9683:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), )] (%9683:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9684:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), )] (%9680:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9685:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), )] (%9685:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)], %9684:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) -> (%9686:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), )] (%9681:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) -> (%9681:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) + linalg.CPU.SliceOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), )] (%9681:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) -> (%9681:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) + linalg.CPU.NegOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), )] (%9681:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) -> (%9687:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), )] (%9687:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)], %9681:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) -> (%9688:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), )] (%9688:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)], %8267:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=63)]) -> (%9689:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), )] (%9681:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)], %8268:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32), uuid=65)]) -> (%9690:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), )] (%9690:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)], %9689:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) -> (%9691:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=1023, solved=0), )] (%9691:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1021)]) -> (%9692:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=1023)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=1023, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024, solved=0), )] (%9692:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=1023)]) -> (%9693:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024, solved=0), )] (%9693:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)]) -> (%9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=0), outputs_0:QuantSpec(Raw(type: Float16), uuid=1025, solved=0), )] (%9679:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018)]) -> (%9696:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=1025)]) + linalg.CPU.CastTypeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float16), uuid=1025, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026, solved=0), )] (%9696:tensor<[1, 8, 32, 128], Float16, CPU>[quant_recipe:QuantSpec(Raw(type: Float16), uuid=1025)]) -> (%9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30, solved=0), )] (%8262:tensor<[1, 8, 128, 992], Int8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)], %9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)]) -> (%9699:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) + linalg.CPU.ConcatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=0), )] (%8263:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)], %9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) -> (%9700:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30, solved=0), )] (%9699:tensor<[1, 8, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) -> (%9701:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) + linalg.CPU.RepeatOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=0), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=0), )] (%9700:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)]) -> (%9702:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027, solved=0), )] (%9686:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019)], %9701:tensor<[1, 16, 128, 1024], Int8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: -128, quant_max: 127, quant_to_type: Int8, scale_type: Float32), uuid=30)]) -> (%9703:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1028, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027, solved=0), )] (%9703:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027)], %9704:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1028), constant:[0.088388346]]) -> (%9705:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027)]) + linalg.CPU.ReduceMinOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029, solved=0), )] (%9705:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027)]) -> (%9706:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1030, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029, solved=0), )] (%9706:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029)], %9707:tensor<[1], Float32, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1030), constant:[-20]]) -> (%9708:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029)]) + linalg.CPU.EqualOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=0), inputs_1:QuantSpec(Raw(type: UInt16), uuid=1031, solved=0), outputs_0:QuantSpec(Raw(type: UInt8), uuid=1032, solved=0), )] (%8207:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2)], %9709:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=1031), constant:[0]]) -> (%9710:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1032)]) + linalg.CPU.WhereOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=1032, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027, solved=0), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029, solved=0), )] (%9710:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1032)], %9705:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027)], %9708:tensor<[1, 16, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029)]) -> (%9711:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029)]) + linalg.CPU.SoftmaxOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033, solved=0), )] (%9711:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029)]) -> (%9712:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033)]) + linalg.CPU.MatMulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033, solved=0), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034, solved=0), )] (%9712:tensor<[1, 16, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033)], %9702:tensor<[1, 16, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58)]) -> (%9713:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034)]) + linalg.CPU.TransposeOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034, solved=0), )] (%9713:tensor<[1, 16, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034)]) -> (%9714:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034)]) + linalg.CPU.ViewOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034, solved=0), )] (%9714:tensor<[1, 32, 16, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034)]) -> (%9714:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1035, solved=0))] (%9714:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1034)]) -> (%9715:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9672:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9715:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036)]) -> (%9716:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1037, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=0))] (%9716:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9717:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1037)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1037, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1039, solved=0))] (%9717:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1037)]) -> (%9718:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1037, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1041, solved=0))] (%9717:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1037)]) -> (%9719:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042)]) + linalg.CPU.SigmoidOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1043, solved=0), )] (%9719:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042)]) -> (%9720:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1043)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1043, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042, solved=0), )] (%9719:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042)], %9720:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1043)]) -> (%9721:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042)]) + linalg.CPU.MulOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042, solved=0), )] (%9721:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042)], %9718:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040)]) -> (%9722:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1045, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1044, solved=0))] (%9722:tensor<[1, 32, 6144], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1042)]) -> (%9723:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1045)]) + linalg.CPU.AddOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1045, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), )] (%9716:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)], %9723:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1045)]) -> (%9724:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) + linalg.CPU.RMSNormOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1046, solved=0), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1047, solved=0))] (%9724:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=60)]) -> (%9725:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1046)]) + linalg.CPU.LinearOp [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1046, solved=0), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1049, solved=0), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: -1, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1048, solved=0)), using_qnn:true] (%9725:tensor<[1, 32, 2048], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1046)]) -> (%9726:tensor<[1, 32, 151936], UInt16PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1049)]) + cf.ReturnOp (%9726:tensor<[1, 32, 151936], UInt16PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65536, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1049)], %8291:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=79)], %8343:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=114)], %8395:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=149)], %8447:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=184)], %8499:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=219)], %8551:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=254)], %8603:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=289)], %8655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=324)], %8707:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359)], %8759:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=394)], %8811:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=429)], %8863:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=464)], %8915:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=499)], %8967:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=534)], %9019:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=569)], %9071:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=604)], %9123:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=639)], %9175:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=674)], %9227:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=709)], %9279:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=744)], %9331:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779)], %9383:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=814)], %9435:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=849)], %9487:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=884)], %9539:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=919)], %9591:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=954)], %9643:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=989)], %9695:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1024)], %8293:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=81)], %8345:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=116)], %8397:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=151)], %8449:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=186)], %8501:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=221)], %8553:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=256)], %8605:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=291)], %8657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=326)], %8709:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361)], %8761:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=396)], %8813:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=431)], %8865:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=466)], %8917:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=501)], %8969:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=536)], %9021:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=571)], %9073:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=606)], %9125:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=641)], %9177:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=676)], %9229:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=711)], %9281:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=746)], %9333:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=781)], %9385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=816)], %9437:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=851)], %9489:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=886)], %9541:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=921)], %9593:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=956)], %9645:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=991)], %9697:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1026)]) -> () + } + } } diff --git a/mllm/backends/qnn/QNNUtils.hpp b/mllm/backends/qnn/QNNUtils.hpp index 5c0483dfb..99695e784 100644 --- a/mllm/backends/qnn/QNNUtils.hpp +++ b/mllm/backends/qnn/QNNUtils.hpp @@ -140,7 +140,7 @@ inline void __mllmQnnLoggerCallback(const char* fmt, QnnLog_Level_t level, uint6 inline const std::string QNN_QUANT_SCALE_NAME = "qnn_quant_scale"; inline float getQuantScale(Tensor& tensor) { if (!tensor.attachedViews().contains(QNN_QUANT_SCALE_NAME)) { return 0.0f; } - return tensor.attachedViews()[QNN_QUANT_SCALE_NAME]->ptr()[0]; + return tensor.attachedViews()[QNN_QUANT_SCALE_NAME].second->ptr()[0]; } inline void setQuantScale(Tensor& tensor, float scale) { @@ -149,7 +149,7 @@ inline void setQuantScale(Tensor& tensor, float scale) { t.at({0}) = scale; tensor.attach(QNN_QUANT_SCALE_NAME, t.impl()); } else { - tensor.attachedViews()[QNN_QUANT_SCALE_NAME]->ptr()[0] = scale; + tensor.attachedViews()[QNN_QUANT_SCALE_NAME].second->ptr()[0] = scale; } } diff --git a/mllm/backends/qnn/aot/passes/AOTPipeline.cpp b/mllm/backends/qnn/aot/passes/AOTPipeline.cpp index c60c6aa78..af534a4e9 100644 --- a/mllm/backends/qnn/aot/passes/AOTPipeline.cpp +++ b/mllm/backends/qnn/aot/passes/AOTPipeline.cpp @@ -24,8 +24,8 @@ std::vector> createQnnAOTLoweringPipeline(QnnAOTEnv* e ret.emplace_back(createMergeLLMHeadIntoMainGraphPass()); ret.emplace_back(createLLMQuantRecipePass()); ret.emplace_back(createPTQPass()); - // ret.emplace_back(createSplitLLMGraphPass()); - // ret.emplace_back(createMarkTensorIOPass()); + ret.emplace_back(createSplitLLMGraphPass()); + ret.emplace_back(createMarkTensorIOPass()); // ret.emplace_back(createLLM2QnnLoweringPass()); } else { MLLM_WARN("This pass currently only supports LLM applications. Please ensure your config contains 'quant_recipe.llm_recipe " diff --git a/mllm/compile/ir/linalg/Attribute.cpp b/mllm/compile/ir/linalg/Attribute.cpp index 91634afcd..03d3e4c32 100644 --- a/mllm/compile/ir/linalg/Attribute.cpp +++ b/mllm/compile/ir/linalg/Attribute.cpp @@ -121,6 +121,8 @@ void LinalgIRQuantizatonAnnotationAttr::dump(IRPrinter& p) { } ss << ", "; ss << "uuid=" << q->uuid; + ss << ", "; + ss << "solved=" << q->solved; ss << ")"; return ss.str(); }; diff --git a/mllm/core/Tensor.cpp b/mllm/core/Tensor.cpp index ee0d69752..61b5ed65f 100644 --- a/mllm/core/Tensor.cpp +++ b/mllm/core/Tensor.cpp @@ -32,12 +32,12 @@ namespace mllm { void Tensor::operator delete(void* ptr) noexcept { ((Tensor*)ptr)->impl_.reset(); - for (auto& [a, _] : ((Tensor*)ptr)->impl_->attachedViews()) { ((Tensor*)ptr)->impl_->attachedViews()[a].reset(); } + for (auto& [a, _] : ((Tensor*)ptr)->impl_->attachedViews()) { ((Tensor*)ptr)->impl_->attachedViews()[a].second.reset(); } } void Tensor::delete_() noexcept { this->impl_.reset(); - for (auto& [a, _] : this->impl_->attachedViews()) { this->impl_->attachedViews()[a].reset(); } + for (auto& [a, _] : this->impl_->attachedViews()) { this->impl_->attachedViews()[a].second.reset(); } } /** @@ -100,13 +100,13 @@ Tensor& Tensor::allocExtraTensorView(const std::string& extra_tensor_name, const MLLM_RT_ASSERT_EQ(impl_->attachedViews().count(extra_tensor_name), 0); auto storage = TensorStorage::create(shape, dtype, device); auto impl = TensorViewImpl::create(shape, storage); - impl_->attachedViews().insert({extra_tensor_name, impl}); + impl_->attachedViews().insert({extra_tensor_name, {true, impl}}); return *this; } Tensor Tensor::getExtraTensorViewInTensor(const std::string& extra_tensor_name) { MLLM_RT_ASSERT_EQ(impl_->attachedViews().count(extra_tensor_name), 1); - return Tensor(impl_->attachedViews().at(extra_tensor_name)); + return Tensor(impl_->attachedViews().at(extra_tensor_name).second); } Tensor Tensor::zeros(const std::vector& shape, DataTypes dtype, DeviceTypes device) { @@ -521,14 +521,19 @@ size_t Tensor::hash() const { std::vector heap_buf; auto* buf = stack_buf; - size_t count = 1 + impl_->attachedViews().size(); + size_t count = 1; + for (const auto& [_, view] : impl_->attachedViews()) { + if (!view.first) { count++; } + } if (count > kStackCap) { heap_buf.resize(count); buf = heap_buf.data(); } buf[0] = uuid(); size_t idx = 1; - for (const auto& [_, view] : impl_->attachedViews()) { buf[idx++] = view ? view->uuid() : 0u; } + for (const auto& [_, view] : impl_->attachedViews()) { + if (!view.first) { buf[idx++] = view.second ? view.second->uuid() : 0u; } + } return XXH64(buf, count * sizeof(uint32_t), 0); } diff --git a/mllm/core/Tensor.hpp b/mllm/core/Tensor.hpp index 96a375622..90457721f 100644 --- a/mllm/core/Tensor.hpp +++ b/mllm/core/Tensor.hpp @@ -698,9 +698,13 @@ class Tensor { return *(const_cast(this)->offsettedPtr(offsets)); } - [[nodiscard]] std::unordered_map& attachedViews() { return impl_->attachedViews(); } + [[nodiscard]] std::unordered_map>& attachedViews() { + return impl_->attachedViews(); + } - void attach(const std::string& name, const TensorViewImpl::ptr_t& view) { impl_->attachedViews()[name] = view; } + void attach(const std::string& name, const TensorViewImpl::ptr_t& view, bool exclude_from_hash = false) { + impl_->attachedViews()[name] = {exclude_from_hash, view}; + } private: template diff --git a/mllm/core/TensorViewImpl.hpp b/mllm/core/TensorViewImpl.hpp index 4b7b146b7..61a9fc285 100644 --- a/mllm/core/TensorViewImpl.hpp +++ b/mllm/core/TensorViewImpl.hpp @@ -89,7 +89,7 @@ class TensorViewImpl : public std::enable_shared_from_this { inline void dropStorage() { storage_ = nullptr; } - inline std::unordered_map& attachedViews() { return attached_views_; } + inline std::unordered_map>& attachedViews() { return attached_views_; } private: int32_t shape_len_ = 0; @@ -97,7 +97,9 @@ class TensorViewImpl : public std::enable_shared_from_this { int32_t shape_[MLLM_TENSOR_SHAPE_MAX_LEN]; int32_t stride_[MLLM_TENSOR_SHAPE_MAX_LEN]; std::shared_ptr storage_ = nullptr; - std::unordered_map attached_views_; + + // std::pair's bool for judge if this tensor should be considered in hashing + std::unordered_map> attached_views_; }; } // namespace mllm From ecbef680c0266c3d2825346e126fd5e7800f876c Mon Sep 17 00:00:00 2001 From: chenghuaWang <2923277184@qq.com> Date: Mon, 5 Jan 2026 08:22:47 +0000 Subject: [PATCH 10/13] fix: AOT Pipeline pass --- examples/qwen3_qnn_aot/compile.cpp | 21 +- .../qwen3_qnn_aot/modeling_qwen_qnn_aot.hpp | 76 ++----- mllm/backends/qnn/aot/passes/AOTPipeline.cpp | 5 +- mllm/backends/qnn/aot/passes/AOTPipeline.hpp | 4 +- .../qnn/aot/passes/LLMQuantRecipePass.cpp | 113 +++------- mllm/backends/qnn/aot/passes/PTQPass.cpp | 207 +++++++++++++++++- mllm/compile/ir/linalg/Attribute.cpp | 2 + mllm/core/Tensor.cpp | 9 +- mllm/core/Tensor.hpp | 10 + .../qualcomm/transformers/core/qlinear.py | 55 +++++ .../qualcomm/transformers/core/rms_norm.py | 64 +++++- .../transformers/qwen3/modeling_qwen3.py | 22 ++ .../qualcomm/transformers/qwen3/runner.py | 13 ++ .../qualcomm/transformers/qwen3/train.py | 5 + pymllm/convertor/mllm_type_mapping.py | 1 + 15 files changed, 452 insertions(+), 155 deletions(-) diff --git a/examples/qwen3_qnn_aot/compile.cpp b/examples/qwen3_qnn_aot/compile.cpp index 26f10be05..427d6b0e7 100644 --- a/examples/qwen3_qnn_aot/compile.cpp +++ b/examples/qwen3_qnn_aot/compile.cpp @@ -37,17 +37,6 @@ MLLM_MAIN({ auto model_cfg = mllm::models::qwen3::Qwen3Config(model_cfg_path.get()); auto model = mllm::models::qwen3::Qwen3ForCausalLM(model_cfg); auto params = mllm::load(model_path.get(), mllm::ModelFileVersion::kV2); - - // Gen sin and cos - { - auto inv = mllm::models::qwen3::makeRoPEInvFreq(model_cfg.head_dim, model_cfg.rope_theta); - auto position_ids = mllm::Tensor::empty({CL}, mllm::kInt32, mllm::kCPU).alloc(); - auto position_ids_ptr = position_ids.ptr(); - for (int s = 0; s < CL; ++s) { position_ids_ptr[s] = s; } - auto [rope_sin, rope_cos] = mllm::models::qwen3::makeRotaryPosEmbedding(position_ids, inv, 1.f); - params->push("rope_sin", rope_sin.to(mllm::kUInt16PerTensorSym).setMemType(mllm::kParamsNormal).setName("rope_sin")); - params->push("rope_cos", rope_cos.to(mllm::kUInt16PerTensorSym).setMemType(mllm::kParamsNormal).setName("rope_cos")); - } model.load(params); // Sequence: [B, N] @@ -72,8 +61,14 @@ MLLM_MAIN({ model_cfg.num_key_value_heads, model_cfg.head_dim, CL - N, - }, mllm::kInt8PerTensorSym); + }, mllm::kUInt8PerTensorSym); trace_inputs[past_value_name] = mllm::Tensor::empty({1, model_cfg.num_key_value_heads, CL - N, model_cfg.head_dim}, mllm::kUInt8PerTensorSym); + + trace_inputs[past_key_name].attach("scale", params->pull("model.layers." + std::to_string(i) + ".self_attn.k_cast_to_int8_qdq.fake_quant.scale").impl(), true); + trace_inputs[past_key_name].attach("zero_point", params->pull("model.layers." + std::to_string(i) + ".self_attn.k_cast_to_int8_qdq.fake_quant.zero_point").impl(), true); + + trace_inputs[past_value_name].attach("scale", params->pull("model.layers." + std::to_string(i) + ".self_attn.v_cast_to_int8_qdq.fake_quant.scale").impl(), true); + trace_inputs[past_value_name].attach("zero_point", params->pull("model.layers." + std::to_string(i) + ".self_attn.v_cast_to_int8_qdq.fake_quant.zero_point").impl(), true); // clang-format on } @@ -84,7 +79,7 @@ MLLM_MAIN({ mllm::qnn::aot::parseQcomTargetMachineFromJSONFile(qnn_aot_cfg_files.get())); mllm::ir::PassManager pm(ir["model"]); - pm.reg(mllm::qnn::aot::createQnnAOTLoweringPipeline(&qnn_aot_env, qnn_aot_cfg_files.get())); + pm.reg(mllm::qnn::aot::createQnnAOTLoweringPipeline(&qnn_aot_env, qnn_aot_cfg_files.get(), params)); pm.run(); mllm::redirect("qwen3_qnn_aot.mir", [&]() { mllm::print(ir["model"]); }); diff --git a/examples/qwen3_qnn_aot/modeling_qwen_qnn_aot.hpp b/examples/qwen3_qnn_aot/modeling_qwen_qnn_aot.hpp index 5677d27f2..14241684a 100644 --- a/examples/qwen3_qnn_aot/modeling_qwen_qnn_aot.hpp +++ b/examples/qwen3_qnn_aot/modeling_qwen_qnn_aot.hpp @@ -88,64 +88,30 @@ Tensor QDQ_KV(nn::Module* m, Tensor in, const std::string& qdq_name_in_pytorch) return in; } -} // namespace ptq +Tensor QDQ_ROPE(nn::Module* m, Tensor in, const std::string& qdq_name_in_pytorch) { + auto scale_name = m->getModuleName() + "." + qdq_name_in_pytorch + ".fake_quant.scale"; + auto zp_name = m->getModuleName() + "." + qdq_name_in_pytorch + ".fake_quant.zero_point"; -inline auto makeRoPEInvFreq(int output_dim, float rope_theta) -> Tensor { - auto inv_freq = Tensor::empty({output_dim / 2}, kFloat32, kCPU).alloc(); - auto inv_freq_ptr = inv_freq.ptr(); - for (int i = 0; i < output_dim / 2; i++) { inv_freq_ptr[i] = 1.0 / std::pow(rope_theta, 2.0 * i / output_dim); } - return inv_freq; -} + (void)in.__unsafeSetDType(kUInt16PerTensorAsy); -inline auto makeRotaryPosEmbedding(Tensor& position_ids, const Tensor& inv_freq, - float attention_scaling = 1.0f) -> std::pair { - auto batch_size = position_ids.shape()[0]; - auto seq_len = position_ids.shape()[1]; - auto inv_freq_len = inv_freq.shape()[0]; - auto dim = inv_freq_len * 2; - - // Create freqs tensor: position_ids @ inv_freq - auto freqs = Tensor::empty({batch_size, seq_len, inv_freq_len}, kFloat32, kCPU).alloc(); - auto freqs_ptr = freqs.ptr(); - auto position_ids_ptr = position_ids.ptr(); - auto inv_freq_ptr = inv_freq.ptr(); - - // Compute freqs = position_ids[:, :, None] @ inv_freq[None, :] - for (int b = 0; b < batch_size; ++b) { - for (int s = 0; s < seq_len; ++s) { - auto pos = position_ids_ptr[b * seq_len + s]; - for (int d = 0; d < inv_freq_len; ++d) { - freqs_ptr[b * seq_len * inv_freq_len + s * inv_freq_len + d] = static_cast(pos) * inv_freq_ptr[d]; - } + switch (in.dtype()) { + case kUInt16PerTensorAsy: { + auto scale = m->getTopParameterFile()->pull(scale_name); + auto zp = m->getTopParameterFile()->pull(zp_name); + in.attach("scale", scale.impl(), true); + in.attach("zero_point", zp.impl(), true); + break; } - } - - // Create sin and cos tensors with shape [batch_size, seq_len, dim] - auto sin_emb = Tensor::empty({batch_size, seq_len, dim}, kFloat32, kCPU).alloc(); - auto cos_emb = Tensor::empty({batch_size, seq_len, dim}, kFloat32, kCPU).alloc(); - auto sin_ptr = sin_emb.ptr(); - auto cos_ptr = cos_emb.ptr(); - - // Compute sin and cos embeddings: emb = [freqs, freqs] - for (int b = 0; b < batch_size; ++b) { - for (int s = 0; s < seq_len; ++s) { - for (int d = 0; d < inv_freq_len; ++d) { - auto freq = freqs_ptr[b * seq_len * inv_freq_len + s * inv_freq_len + d]; - auto sin_val = std::sin(freq) * attention_scaling; - auto cos_val = std::cos(freq) * attention_scaling; - - // Store the same values in both halves: [freqs, freqs] - sin_ptr[b * seq_len * dim + s * dim + d] = sin_val; - sin_ptr[b * seq_len * dim + s * dim + d + inv_freq_len] = sin_val; - cos_ptr[b * seq_len * dim + s * dim + d] = cos_val; - cos_ptr[b * seq_len * dim + s * dim + d + inv_freq_len] = cos_val; - } + default: { + MLLM_ERROR_EXIT(ExitCode::kCoreError, "Can't Process dtype={}", nameOfType(in.dtype())); } } - return {sin_emb, cos_emb}; + return in; } +} // namespace ptq + class Qwen3MLP final : public nn::Module { nn::Linear gate_proj_; nn::Linear up_proj_; @@ -357,8 +323,8 @@ class Qwen3Text final : public nn::Module { for (auto [idx, b] : enumerate(decode_blocks_.list())) { b.self_attn_.layer_idx_ = idx; } norm_ = reg("norm", cfg.rms_norm_eps); embedding_ = reg("embed_tokens", cfg.vocab_size, cfg.hidden_size); - rope_sin_ = reg("rope_sin", "rope_sin"); - rope_cos_ = reg("rope_cos", "rope_cos"); + rope_sin_ = reg("mllm_max_sin_embedding", "model.mllm_max_sin_embedding"); + rope_cos_ = reg("mllm_max_cos_embedding", "model.mllm_max_cos_embedding"); } std::vector forward(const std::vector& inputs, const std::vector& args) override { @@ -372,8 +338,8 @@ class Qwen3Text final : public nn::Module { auto position_ids = inputs[1]; auto causal_mask = inputs[2]; - auto llm_embedding_sin = rope_sin_()[{{0}, position_ids, {kAll}}]; - auto llm_embedding_cos = rope_cos_()[{{0}, position_ids, {kAll}}]; + auto llm_embedding_sin = ptq::QDQ_ROPE(this, rope_sin_(), "sin_embedding_input_qdq")[{{0}, position_ids, {kAll}}]; + auto llm_embedding_cos = ptq::QDQ_ROPE(this, rope_cos_(), "cos_embedding_input_qdq")[{{0}, position_ids, {kAll}}]; std::vector keys; std::vector values; @@ -477,7 +443,7 @@ class Qwen3ForCausalLM : public ARGeneration, public nn::Module { sequence = llm(llm_inputs)[0]; sequence = lm_head_(ptq::QDQ(this, sequence, "lm_head_input_qdq")); - ptq::QDQ(this, sequence, "lm_head_output_qdq"); + sequence = ptq::QDQ(this, sequence, "lm_head_output_qdq"); ir::lowlevel::traceComment(" ╔═════╗ "); ir::lowlevel::traceComment(" ║ o o ║ "); ir::lowlevel::traceComment(" ║ ▽ ║ "); diff --git a/mllm/backends/qnn/aot/passes/AOTPipeline.cpp b/mllm/backends/qnn/aot/passes/AOTPipeline.cpp index af534a4e9..80fb94ba9 100644 --- a/mllm/backends/qnn/aot/passes/AOTPipeline.cpp +++ b/mllm/backends/qnn/aot/passes/AOTPipeline.cpp @@ -8,13 +8,16 @@ #include "mllm/backends/qnn/aot/passes/OpNamingPass.hpp" #include "mllm/backends/qnn/aot/passes/PTQPass.hpp" #include "mllm/backends/qnn/aot/passes/SplitLLMGraphPass.hpp" +#include "mllm/core/ParameterFile.hpp" namespace mllm::qnn::aot { -std::vector> createQnnAOTLoweringPipeline(QnnAOTEnv* env, const std::string& config_path) { +std::vector> createQnnAOTLoweringPipeline(QnnAOTEnv* env, const std::string& config_path, + const ParameterFile::ptr_t& pf) { std::vector ret; AOTCompileContext::getInstance().setEnv(env); AOTCompileContext::getInstance().setConfig(config_path); + AOTCompileContext::getInstance().setParamFile(pf); auto config = AOTCompileContext::getInstance().getConfig(); if (config.contains("quant_recipe") && config["quant_recipe"].contains("llm_recipe") diff --git a/mllm/backends/qnn/aot/passes/AOTPipeline.hpp b/mllm/backends/qnn/aot/passes/AOTPipeline.hpp index 0b14f0c11..d854de974 100644 --- a/mllm/backends/qnn/aot/passes/AOTPipeline.hpp +++ b/mllm/backends/qnn/aot/passes/AOTPipeline.hpp @@ -7,9 +7,11 @@ #include "mllm/backends/qnn/aot/QnnWrappersAPI.hpp" #include "mllm/compile/passes/Pass.hpp" +#include "mllm/core/ParameterFile.hpp" namespace mllm::qnn::aot { -std::vector> createQnnAOTLoweringPipeline(QnnAOTEnv* env, const std::string& config_path); +std::vector> createQnnAOTLoweringPipeline(QnnAOTEnv* env, const std::string& config_path, + const ParameterFile::ptr_t& pf); } // namespace mllm::qnn::aot diff --git a/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.cpp b/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.cpp index adada76ed..f60ecc14d 100644 --- a/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.cpp +++ b/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.cpp @@ -387,8 +387,9 @@ bool LLMQuantRecipeRMSNormPattern::rewrite(ir::IRWriter& writer, const ir::op_pt MLLM_RETURN_FALSE_IF_NOT(weight_reg_tensor_ir->outputs().front()->isa_()); auto t = weight_reg_tensor_ir->outputs().front()->cast_(); - auto weight_spec_attr = cloneQuantizationSpecType( - writer.getContext(), node->inputs().front()->getAttr("quant_recipe")->cast_()); + // FIXME: This dtype is hardcoded. We should make it right. + auto weight_spec_attr = writer.create( + ir::linalg::QuantizationSpecAsymPerTensor::create(0, 65536, kUInt16, kFloat32, kInt32, Tensor::nil(), Tensor::nil())); weight_reg_tensor_ir->outputs().front()->setAttr("quant_recipe", weight_spec_attr); // Get self anno @@ -421,26 +422,15 @@ bool LLMQuantRecipeIndexPattern::isMatch(const mllm::ir::op_ptr_t& op) { bool LLMQuantRecipeIndexPattern::rewrite(ir::IRWriter& writer, const ir::op_ptr_t& node) { auto index_ir = node->cast_(); - auto i_0 = *(node->inputs().begin()); // Index what - auto o_0 = *(node->outputs().begin()); // Output + auto i_0 = *(node->inputs().begin()); // Index what if (!i_0->getAttr("quant_recipe")) { auto i_0_spec = genSimpleQuantizationSpecAttr(writer.getContext(), i_0->cast_()); i_0->setAttr("quant_recipe", i_0_spec); } - auto o_0_spec = genSimpleQuantizationSpecAttr(writer.getContext(), o_0->cast_()); - o_0->setAttr("quant_recipe", o_0_spec); - - auto annotation_attr = writer.create(); - annotation_attr->annotation_.inputs.emplace_back( - i_0->getAttr("quant_recipe")->cast_()->spec_); - annotation_attr->annotation_.outputs.emplace_back( - o_0->getAttr("quant_recipe")->cast_()->spec_); - - node->setAttr("quant_recipe", annotation_attr); - - return true; + return shareQuantSpecSingleInputToSingleOutputAndSetOpQuantAnnoAttr(writer.getContext(), + node->cast_()); } //===----------------------------------------------------------------------===// @@ -848,85 +838,54 @@ bool LLMQuantRecipeViewPattern::rewrite(ir::IRWriter& writer, const ir::op_ptr_t bool LLMQuantRecipeEmbeddingPattern::isMatch(const mllm::ir::op_ptr_t& op) { // Pattern: // - // embedding(op) -> quantize(op) + // embedding(op) MLLM_RETURN_FALSE_IF_NOT(op->isa_()); - MLLM_RETURN_FALSE_IF_NOT(op->nextOp()); - MLLM_RETURN_FALSE_IF_NOT(op->nextOp()->isa_()); // Already marked. MLLM_RETURN_FALSE_IF(op->getAttr("quant_recipe")); - MLLM_RETURN_FALSE_IF(op->nextOp()->getAttr("quant_recipe")); return true; } bool LLMQuantRecipeEmbeddingPattern::rewrite(ir::IRWriter& writer, const ir::op_ptr_t& node) { auto embedding_op = node->cast_(); - auto quantize_op = embedding_op->nextOp()->cast_(); + auto i_0 = *(node->inputs().begin()); + auto o_0 = *(node->outputs().begin()); auto annotation_attr = writer.create(); - // Inputs to this Quantization node must be raw type. - { - auto i_type = quantize_op->inputs().front()->cast_()->tensor_.dtype(); - MLLM_RT_ASSERT(i_type == kFloat32 || i_type == kFloat16); - auto i_quant_spec = ir::linalg::QuantizationSpecRaw::create(i_type); - annotation_attr->annotation_.inputs.emplace_back(i_quant_spec); - quantize_op->inputs().front()->setAttr("quant_recipe", - writer.create(i_quant_spec)); + if (!i_0->getAttr("quant_recipe")) { + auto i_0_spec = genSimpleQuantizationSpecAttr(writer.getContext(), i_0->cast_()); + i_0->setAttr("quant_recipe", i_0_spec); + } else { + annotation_attr->annotation_.inputs.emplace_back( + i_0->getAttr("quant_recipe")->cast_()->spec_); } - // Outputs to this Quantization node must be int8 or int16 - { - auto o_type = quantize_op->outputs().front()->cast_()->tensor_.dtype(); - ir::linalg::QuantizationSpec::ptr_t o_quant_spec = nullptr; - switch (o_type) { - case kInt8PerTensorSym: { - o_quant_spec = ir::linalg::QuantizationSpecSymPerTensor::create(-128, 127, kInt8, kFloat32, Tensor::nil()); - break; - } - case kUInt8PerTensorSym: { - o_quant_spec = ir::linalg::QuantizationSpecSymPerTensor::create(0, 255, kUInt8, kFloat32, Tensor::nil()); - break; - } - case kInt16PerTensorSym: { - o_quant_spec = ir::linalg::QuantizationSpecSymPerTensor::create(-32768, 32767, kInt16, kFloat32, Tensor::nil()); - break; - } - case kUInt16PerTensorSym: { - o_quant_spec = ir::linalg::QuantizationSpecSymPerTensor::create(0, 65535, kUInt16, kFloat32, Tensor::nil()); - break; - } - case kUInt16PerTensorAsy: { - o_quant_spec = ir::linalg::QuantizationSpecAsymPerTensor::create(0, 65535, kUInt16, kFloat32, kInt32, Tensor::nil(), - Tensor::nil()); - break; - } - default: { - NYI("Only support [uint16, int16, uint8, int8], [sym] for now."); - } - } - - // Weights - auto weight_name = embedding_op->getAOp()->getName() + ".weight"; - auto weight_reg_tensor_ir = writer.getContext()->lookupSymbolTable(weight_name); - MLLM_RETURN_FALSE_IF_NOT(weight_reg_tensor_ir); - MLLM_RETURN_FALSE_IF_NOT(weight_reg_tensor_ir->isa_()); - MLLM_RETURN_FALSE_IF_NOT(weight_reg_tensor_ir->outputs().front()->isa_()); - auto weight_tensor = weight_reg_tensor_ir->outputs().front()->cast_(); - - annotation_attr->annotation_.outputs.emplace_back(o_quant_spec); - quantize_op->outputs().front()->setAttr("quant_recipe", - writer.create(o_quant_spec)); - - // Embedding weight quantization method same as outputs, but not share, just same type - auto weight_spec_attr = genSimpleQuantizationSpecAttr(writer.getContext(), weight_tensor); - weight_reg_tensor_ir->outputs().front()->setAttr("quant_recipe", weight_spec_attr); - annotation_attr->annotation_.weights.insert({"weight", weight_spec_attr->spec_}); + if (!o_0->getAttr("quant_recipe")) { + auto o_0_spec = genSimpleQuantizationSpecAttr(writer.getContext(), o_0->cast_()); + o_0->setAttr("quant_recipe", o_0_spec); + annotation_attr->annotation_.outputs.emplace_back(o_0_spec->spec_); + } else { + annotation_attr->annotation_.inputs.emplace_back( + o_0->getAttr("quant_recipe")->cast_()->spec_); } + // Weights + auto weight_name = embedding_op->getAOp()->getName() + ".weight"; + auto weight_reg_tensor_ir = writer.getContext()->lookupSymbolTable(weight_name); + MLLM_RETURN_FALSE_IF_NOT(weight_reg_tensor_ir); + MLLM_RETURN_FALSE_IF_NOT(weight_reg_tensor_ir->isa_()); + MLLM_RETURN_FALSE_IF_NOT(weight_reg_tensor_ir->outputs().front()->isa_()); + auto weight_tensor = weight_reg_tensor_ir->outputs().front()->cast_(); + + // Embedding weight quantization method same as outputs, but not share, just same type + auto weight_spec_attr = genSimpleQuantizationSpecAttr(writer.getContext(), weight_tensor); + weight_reg_tensor_ir->outputs().front()->setAttr("quant_recipe", weight_spec_attr); + annotation_attr->annotation_.weights.insert({"weight", weight_spec_attr->spec_}); + // Attach to quantize node - node->nextOp()->setAttr("quant_recipe", annotation_attr); + node->setAttr("quant_recipe", annotation_attr); return true; } diff --git a/mllm/backends/qnn/aot/passes/PTQPass.cpp b/mllm/backends/qnn/aot/passes/PTQPass.cpp index 9d4cabee3..0539b23a2 100644 --- a/mllm/backends/qnn/aot/passes/PTQPass.cpp +++ b/mllm/backends/qnn/aot/passes/PTQPass.cpp @@ -1,28 +1,229 @@ // Copyright (c) MLLM Team. // Licensed under the MIT License. +#include + #include "mllm/backends/qnn/aot/passes/PTQPass.hpp" #include "mllm/backends/qnn/aot/passes/AOTCompileContext.hpp" #include "mllm/compile/ir/builtin/Op.hpp" #include "mllm/compile/ir/graph/Op.hpp" +#include "mllm/compile/ir/linalg/Attribute.hpp" #include "mllm/compile/ir/linalg/Op.hpp" #include "mllm/compile/ir/tensor/Value.hpp" #include "mllm/compile/ir/cf/Op.hpp" #include "mllm/compile/ir/Node.hpp" #include "mllm/core/OpTypes.hpp" +#include "mllm/core/ParameterFile.hpp" #include "mllm/utils/Common.hpp" namespace mllm::qnn::aot { namespace { -void solveStaticWeights() {} +template +void checkTypeLimits(Tensor in, int quant_min, int quant_max) { // NOLINT + auto numel = in.numel(); + for (int i = 0; i < numel; ++i) { + MLLM_RT_ASSERT(*(in.ptr() + i) >= quant_min); + MLLM_RT_ASSERT(*(in.ptr() + i) <= quant_max); + } +} + +void solveLinearWeight(const ir::IRContext::ptr_t& ctx, const ParameterFile::ptr_t& pf, + const ir::linalg::LinalgIROp::ptr_t& op) { + auto mllm_op = op->getAOp(); + MLLM_INFO("PTQPass working on Op: {}'s weight", mllm_op->getName()); + auto weight_spec = + op->getAttr("quant_recipe")->cast_()->annotation_.weights.at("weight"); + + if (weight_spec->solved) return; + + switch (weight_spec->type) { + case ir::linalg::QuantizationSpecType::kLPBQ: { + auto this_spec = std::static_pointer_cast(weight_spec); + auto scale1 = pf->pull(mllm_op->getName() + ".scale1"); // using uint8 to store uint4 + auto scale2 = pf->pull(mllm_op->getName() + ".scale2"); + auto weight = pf->pull(mllm_op->getName() + ".weight"); + + // FIXME weight maybe error, Check qnn eats int8 or uint8. Here weight using int8 to store int4. + checkTypeLimits(weight, -8, 7); // Int4 + checkTypeLimits(scale1, 0, 16); // UInt4 + + this_spec->scale_level_0_int = scale1; + this_spec->scale_level_1_fp = scale2; + + weight_spec->solved = true; + break; + } + default: { + NYI("quant recipe type not support"); + } + } +} + +void solveRMSNormWeight(const ir::IRContext::ptr_t& ctx, const ParameterFile::ptr_t& pf, + const ir::linalg::LinalgIROp::ptr_t& op) { + auto mllm_op = op->getAOp(); + MLLM_INFO("PTQPass working on Op: {}'s weight", mllm_op->getName()); + auto weight_spec = + op->getAttr("quant_recipe")->cast_()->annotation_.weights.at("weight"); + + if (weight_spec->solved) return; + + switch (weight_spec->type) { + case ir::linalg::QuantizationSpecType::kRaw: { + weight_spec->solved = true; + break; + } + case ir::linalg::QuantizationSpecType::kAsymPerTensor: { + auto this_spec = std::static_pointer_cast(weight_spec); + auto scale = pf->pull(mllm_op->getName() + ".scale"); + auto zero_point = pf->pull(mllm_op->getName() + ".zero_point"); + this_spec->scale = scale; + this_spec->zero_point = zero_point; + checkTypeLimits(pf->pull(mllm_op->getName() + ".weight"), this_spec->quant_min, this_spec->quant_max); + MLLM_RT_ASSERT(scale.dtype() == kFloat32); + MLLM_RT_ASSERT(scale.rank() == 1); + MLLM_RT_ASSERT(scale.item() > 0); + MLLM_RT_ASSERT(zero_point.dtype() == kInt32); + MLLM_RT_ASSERT(zero_point.rank() == 1); + MLLM_RT_ASSERT(zero_point.item() >= 0); + weight_spec->solved = true; + break; + } + default: { + NYI("quant recipe type not support"); + } + } +} + +void solveEmbeddingWeight(const ir::IRContext::ptr_t& ctx, const ParameterFile::ptr_t& pf, + const ir::linalg::LinalgIROp::ptr_t& op) { + auto mllm_op = op->getAOp(); + MLLM_INFO("PTQPass working on Op: {}'s weight", mllm_op->getName()); + auto weight_spec = + op->getAttr("quant_recipe")->cast_()->annotation_.weights.at("weight"); + + if (weight_spec->solved) return; + + switch (weight_spec->type) { + case ir::linalg::QuantizationSpecType::kRaw: { + weight_spec->solved = true; + break; + } + default: { + NYI("quant recipe type not support"); + } + } +} + +void recursiveSolveWeights(const std::shared_ptr& ir_ctx, const ir::graph::SubGraphOp::ptr_t& call_op, + const ParameterFile::ptr_t& pf) { + auto wow = ir::IRWriter(ir_ctx, call_op->getTopRegion()); + wow.walk([&](ir::IRWriter& w, const ir::Op::ptr_t& op) -> ir::IRWriter::WalkResult { + if (op->isa_()) { solveLinearWeight(w.getContext(), pf, op->cast_()); } + if (op->isa_()) { solveRMSNormWeight(w.getContext(), pf, op->cast_()); } + if (op->isa_()) { solveEmbeddingWeight(w.getContext(), pf, op->cast_()); } + if (op->isa_()) { + auto ns = op->cast_()->getSymbolAttr()->str(); + recursiveSolveWeights(w.getContext(), w.getContext()->lookupSymbolTable(ns)->cast_(), pf); + } + return ir::IRWriter::WALK_CONTINUE; + }); +} -void solveStaticRoPE() {} +void __recursiveSolveNormalImpl(const ir::Val::ptr_t& v) { + MLLM_RT_ASSERT(v->isa_()); + auto tv = v->cast_(); + MLLM_RT_ASSERT(tv->getAttr("quant_recipe")); + auto f_spec = tv->getAttr("quant_recipe")->cast_(); + + if (f_spec->spec_->solved) { return; } + + switch (f_spec->spec_->type) { + case ir::linalg::QuantizationSpecType::kAsymPerTensor: { + if (!tv->tensor_.hasAttachedView("scale") || !tv->tensor_.hasAttachedView("zero_point")) { return; } + auto scale = tv->tensor_.getExtraTensorViewInTensor("scale"); + auto zero_point = tv->tensor_.getExtraTensorViewInTensor("zero_point"); + auto this_spec = std::static_pointer_cast(f_spec->spec_); + this_spec->scale = scale; + this_spec->zero_point = zero_point; + this_spec->solved = true; + break; + } + case ir::linalg::QuantizationSpecType::kSymPerTensor: { + if (!tv->tensor_.hasAttachedView("scale")) { return; } + auto scale = tv->tensor_.getExtraTensorViewInTensor("scale"); + auto this_spec = std::static_pointer_cast(f_spec->spec_); + this_spec->scale = scale; + this_spec->solved = true; + break; + } + case ir::linalg::QuantizationSpecType::kRaw: { + auto this_spec = std::static_pointer_cast(f_spec->spec_); + this_spec->solved = true; + break; + } + default: { + NYI("quant recipe type not support on tensor: {}", v->name()); + } + } +} + +void recursiveSolveNormal(const std::shared_ptr& ir_ctx, const ir::graph::SubGraphOp::ptr_t& call_op, + const ParameterFile::ptr_t& pf) { + auto wow = ir::IRWriter(ir_ctx, call_op->getTopRegion()); + wow.walk([&](ir::IRWriter& w, const ir::Op::ptr_t& op) -> ir::IRWriter::WalkResult { + if (op->isa_()) { + MLLM_INFO("PTQPass relax working on Op: {}'s tensors", op->cast_()->getAOp()->getName()); + + auto inputs = op->inputs(); + auto outputs = op->outputs(); + + for (auto iii : inputs) { __recursiveSolveNormalImpl(iii->cast_()); } + for (auto ooo : inputs) { __recursiveSolveNormalImpl(ooo->cast_()); } + } + + if (op->isa_()) { + auto ns = op->cast_()->getSymbolAttr()->str(); + recursiveSolveNormal(w.getContext(), w.getContext()->lookupSymbolTable(ns)->cast_(), pf); + } + return ir::IRWriter::WALK_CONTINUE; + }); +} } // namespace -uint8_t PTQPass::run(const ir::node_ptr_t& op) { return ir::PASS_RET_SUCCESS; } +uint8_t PTQPass::run(const ir::node_ptr_t& op) { + auto pf = AOTCompileContext::getInstance().getParamFile(); + + // The top op should be ModuleOp + MLLM_RT_ASSERT(op->isa_()); + + auto module_op = op->cast_(); + auto writer = ir::IRWriter(getCtx(), module_op->getTopRegion()); + + ir::graph::CallGraphOp::ptr_t call_main_graph_op = nullptr; + writer.walk( + [&](ir::IRWriter& /*writer*/, const ir::graph::CallGraphOp::ptr_t& call_op) -> ir::IRWriter::WalkResult { + MLLM_RT_ASSERT_EQ(call_main_graph_op, nullptr); + + call_main_graph_op = call_op; + return ir::IRWriter::WalkResult::WALK_CONTINUE; + }); + + // Solve all registered weight + recursiveSolveWeights(writer.getContext(), + getCtx()->lookupSymbolTable(call_main_graph_op->getSymbolAttr()->str())->cast_(), + pf); + + // Solve other normal tensors + recursiveSolveNormal(writer.getContext(), + getCtx()->lookupSymbolTable(call_main_graph_op->getSymbolAttr()->str())->cast_(), + pf); + + return ir::PASS_RET_SUCCESS; +} ir::Pass::ptr_t createPTQPass() { return std::make_shared(); } diff --git a/mllm/compile/ir/linalg/Attribute.cpp b/mllm/compile/ir/linalg/Attribute.cpp index 03d3e4c32..09b35fc96 100644 --- a/mllm/compile/ir/linalg/Attribute.cpp +++ b/mllm/compile/ir/linalg/Attribute.cpp @@ -264,6 +264,8 @@ void LinalgIRQuantizatonSpecAttr::dump(IRPrinter& p) { } ss << ", "; ss << "uuid=" << q->uuid; + ss << ", "; + ss << "solved=" << q->solved; ss << ")"; return ss.str(); }; diff --git a/mllm/core/Tensor.cpp b/mllm/core/Tensor.cpp index 61b5ed65f..4c51c1be8 100644 --- a/mllm/core/Tensor.cpp +++ b/mllm/core/Tensor.cpp @@ -105,7 +105,9 @@ Tensor& Tensor::allocExtraTensorView(const std::string& extra_tensor_name, const } Tensor Tensor::getExtraTensorViewInTensor(const std::string& extra_tensor_name) { - MLLM_RT_ASSERT_EQ(impl_->attachedViews().count(extra_tensor_name), 1); + if (impl_->attachedViews().count(extra_tensor_name) != 1) { + MLLM_ERROR_EXIT(ExitCode::kCoreError, "Can't find {}", extra_tensor_name); + } return Tensor(impl_->attachedViews().at(extra_tensor_name).second); } @@ -503,6 +505,11 @@ Tensor& Tensor::setMemType(TensorMemTypes mem_type) { DataTypes Tensor::dtype() const { return impl()->dtype(); } +Tensor Tensor::__unsafeSetDType(DataTypes dt) const { + impl_->storage()->dtype_ = dt; + return *this; +} + DeviceTypes Tensor::device() const { return impl()->device(); } Tensor::shape_t Tensor::shape() const { return impl()->shape(); } diff --git a/mllm/core/Tensor.hpp b/mllm/core/Tensor.hpp index 90457721f..5046a8e91 100644 --- a/mllm/core/Tensor.hpp +++ b/mllm/core/Tensor.hpp @@ -467,6 +467,14 @@ class Tensor { */ [[nodiscard]] DataTypes dtype() const; + /** + * @brief Unsafe set One Datatype + * + * @param dt + * @return Tensor + */ + [[nodiscard]] Tensor __unsafeSetDType(DataTypes dt) const; + /** * @brief Gets device location. * @return Current device type. @@ -702,6 +710,8 @@ class Tensor { return impl_->attachedViews(); } + bool hasAttachedView(const std::string& name) { return impl_->attachedViews().count(name) == 1; } + void attach(const std::string& name, const TensorViewImpl::ptr_t& view, bool exclude_from_hash = false) { impl_->attachedViews()[name] = {exclude_from_hash, view}; } diff --git a/pymllm/backends/qualcomm/transformers/core/qlinear.py b/pymllm/backends/qualcomm/transformers/core/qlinear.py index bbfcc60df..54006a197 100644 --- a/pymllm/backends/qualcomm/transformers/core/qlinear.py +++ b/pymllm/backends/qualcomm/transformers/core/qlinear.py @@ -17,7 +17,9 @@ def __init__(self, in_features, out_features, bias=True): self.act_quant = None self.weight_quant = None + self.deploy_mode = False + @torch.no_grad() def freeze_weight(self): """PTQ Core: Observe current weights, calculate and fix Scale/ZP""" if self.weight_quant is not None: @@ -66,12 +68,49 @@ def __init__(self, in_features, out_features, bias=True): ) def forward(self, x): + assert self.deploy_mode is False # Activation quantization logic (add act_quant here if needed) x_q = x # Apply fake quantization: use fixed scale if frozen, otherwise update in real-time w_q = self.weight_quant(self.weight) return F.linear(x_q, w_q, self.bias) + @torch.no_grad() + def convert_to_deploy(self): + if self.deploy_mode: + return + + # 1. Ensure Observer is frozen + if self.weight_quant.scale is None: + self.freeze_weight() + + scale = self.weight_quant.scale + zero_point = self.weight_quant.zero_point + + # 2. Use PyTorch native API for Per-Channel quantization + # This handles per-channel complexity and returns quantized tensor + w_q_obj = torch.quantize_per_channel( + self.weight.float(), scale, zero_point, axis=0, dtype=torch.qint8 + ) + + # 3. Extract pure integer data + w_int = w_q_obj.int_repr() + + # 4. Replace Parameter with Buffer + del self.weight + # Register buffer named 'weight' to maintain name consistency + self.register_buffer("weight", w_int) + self.register_buffer("scale", scale) + self.register_buffer("zero_point", zero_point) + + # Remove fake quant module to reduce model size + del self.weight_quant + + self.deploy_mode = True + print( + f"[{self.__class__.__name__}] Converted to deploy. Weight shape: {self.weight.shape}, dtype: {self.weight.dtype}" + ) + # --- 2. LPBQ (Double Quantization) Scheme --- class DoubleQuantizer(nn.Module): @@ -150,3 +189,19 @@ def forward(self, x): # Must use quantized weights w_q for computation w_q = self.weight_quant(self.weight) return F.linear(x, w_q, self.bias) + + @torch.no_grad() + def convert_to_deploy(self): + if self.deploy_mode: + return + + del self.weight + self.register_buffer("weight", self.weight_quant.weight_q) + self.register_buffer("scale1", self.weight_quant.scale_1_uint4) + self.register_buffer("scale2", self.weight_quant.scale_2_fp32) + del self.weight_quant + + self.deploy_mode = True + print( + f"[{self.__class__.__name__}] Converted to deploy. Original float weight removed." + ) diff --git a/pymllm/backends/qualcomm/transformers/core/rms_norm.py b/pymllm/backends/qualcomm/transformers/core/rms_norm.py index 5606dafaa..ec6345d64 100644 --- a/pymllm/backends/qualcomm/transformers/core/rms_norm.py +++ b/pymllm/backends/qualcomm/transformers/core/rms_norm.py @@ -12,6 +12,7 @@ def __init__( ): super().__init__() self.eps = eps + self.quant_bits = quant_bits if isinstance(normalized_shape, int): normalized_shape = (normalized_shape,) @@ -20,12 +21,12 @@ def __init__( # Quantization configuration for Weight self.weight_fake_quant = FakeQuantize( observer=MinMaxObserver.with_args( - qscheme=torch.per_tensor_symmetric, dtype=torch.qint32 + qscheme=torch.per_tensor_affine, dtype=torch.qint32 ), - quant_min=-(2 ** (quant_bits - 1)), - quant_max=2 ** (quant_bits - 1) - 1, + quant_min=0, + quant_max=2 ** (quant_bits) - 1, dtype=torch.qint32, - qscheme=torch.per_tensor_symmetric, + qscheme=torch.per_tensor_affine, ) def forward(self, x): @@ -42,6 +43,61 @@ def forward(self, x): return (x_normed * w_q).to(input_dtype) + @torch.no_grad() + def convert_to_deploy(self): + """ + In-place replacement of self.weight: + Float Parameter -> Int Buffer + """ + # 1. Ensure quantization parameters are ready + if self.weight_fake_quant.scale is None: + self.freeze_weight() + + scale = self.weight_fake_quant.scale + zero_point = self.weight_fake_quant.zero_point + quant_min = self.weight_fake_quant.quant_min + quant_max = self.weight_fake_quant.quant_max + + # 2. Calculate integer values + # w_int = round(w / s + zp) + w_int = torch.round(self.weight / scale + zero_point).clamp( + quant_min, quant_max + ) + + # 3. Set target integer type + if self.quant_bits <= 8: + target_dtype = torch.int8 + elif self.quant_bits <= 16: + target_dtype = torch.int16 + else: + target_dtype = torch.int32 + + w_int = w_int.to(target_dtype) + + # === Key steps: Replacement operations === + + # A. Delete original Parameter 'weight' + # Must delete first, otherwise cannot register buffer with same name + del self.weight + + # B. Register Buffer with same name 'weight' + # This makes state_dict['weight'] become Int Tensor + self.register_buffer("weight", w_int) + + # C. Register Scale (usually needed by engine) + self.register_buffer("scale", scale) + self.register_buffer("zero_point", zero_point) + + # D. Clean up unnecessary modules + if hasattr(self, "weight_fake_quant"): + del self.weight_fake_quant + + class_name = self.__class__.__name__ + instance_class_name = type(self).__name__ + print( + f"Class: {class_name}, Instance: {instance_class_name}, Deploy Mode Activated. 'weight' is now {self.weight.dtype} buffer. zp is {zero_point}" + ) + @torch.no_grad() def freeze_weight(self): """ diff --git a/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py b/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py index 5148684af..9c0696328 100644 --- a/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py +++ b/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py @@ -473,6 +473,28 @@ def __init__(self, config: Qwen3Config): # Initialize weights and apply final processing self.post_init() + @torch.no_grad() + def convert_rope_for_deploy(self): + sin_scale = self.sin_embedding_input_qdq.fake_quant.scale + sin_zero_point = self.sin_embedding_input_qdq.fake_quant.zero_point + sin_quant_min = self.sin_embedding_input_qdq.fake_quant.quant_min + sin_quant_max = self.sin_embedding_input_qdq.fake_quant.quant_max + + cos_scale = self.cos_embedding_input_qdq.fake_quant.scale + cos_zero_point = self.cos_embedding_input_qdq.fake_quant.zero_point + cos_quant_min = self.cos_embedding_input_qdq.fake_quant.quant_min + cos_quant_max = self.cos_embedding_input_qdq.fake_quant.quant_max + + sin_int = torch.round( + self.mllm_max_sin_embedding / sin_scale + sin_zero_point + ).clamp(sin_quant_min, sin_quant_max) + self.mllm_max_sin_embedding = sin_int.to(torch.uint16) + + cos_int = torch.round( + self.mllm_max_cos_embedding / cos_scale + cos_zero_point + ).clamp(cos_quant_min, cos_quant_max) + self.mllm_max_cos_embedding = cos_int.to(torch.uint16) + @check_model_inputs() @auto_docstring def forward( diff --git a/pymllm/backends/qualcomm/transformers/qwen3/runner.py b/pymllm/backends/qualcomm/transformers/qwen3/runner.py index 37f8bae16..7c36940ab 100644 --- a/pymllm/backends/qualcomm/transformers/qwen3/runner.py +++ b/pymllm/backends/qualcomm/transformers/qwen3/runner.py @@ -31,6 +31,15 @@ def enable_qdq_observer(m): m.enable_observer() +def convert_weight(m): + if ( + isinstance(m, QLinearLPBQ) + or isinstance(m, QLinearW8A16_PerChannelSym) + or isinstance(m, QRMSNorm) + ): + m.convert_to_deploy() + + class Qwen3Quantizer: def __init__(self, model_path: str, mllm_qualcomm_max_length=2048): self.tokenizer = AutoTokenizer.from_pretrained(model_path) @@ -167,3 +176,7 @@ def calibrate(self, num_samples=64, max_seq_length=512): # 4. Close Observer, freeze calibrated quantization parameters self.freeze_activation() print("\nCalibration completed, activation quantization parameters frozen.") + + def convert(self): + self.model.apply(convert_weight) + self.model.model.convert_rope_for_deploy() diff --git a/pymllm/backends/qualcomm/transformers/qwen3/train.py b/pymllm/backends/qualcomm/transformers/qwen3/train.py index 8432e4812..13ad2785a 100644 --- a/pymllm/backends/qualcomm/transformers/qwen3/train.py +++ b/pymllm/backends/qualcomm/transformers/qwen3/train.py @@ -40,9 +40,14 @@ def main(): m.calibrate(num_samples=args.num_samples, max_seq_length=args.max_length) # m.compile() m.infer(args.infer_text) + + # !!! + # Things below is for deploy. We will turn all fp32 weights and some buffers(rope) to quantized dtype. + # !!! m.model.lm_head.weight = torch.nn.Parameter( m.model.model.embed_tokens.weight.clone() ) + m.convert() os.makedirs(args.output_dir, exist_ok=True) model_save_path = os.path.join(args.output_dir, "model.safetensors") diff --git a/pymllm/convertor/mllm_type_mapping.py b/pymllm/convertor/mllm_type_mapping.py index 0b98b7e6e..05ea544c2 100644 --- a/pymllm/convertor/mllm_type_mapping.py +++ b/pymllm/convertor/mllm_type_mapping.py @@ -91,6 +91,7 @@ torch.qint8: 16, # kInt8 torch.quint8: 129, # kUInt8 torch.qint32: 18, # kInt32 + torch.uint16: 130, # kUInt16 } ) From 82900b15bdf8e05c1456742a68b63df359beb122 Mon Sep 17 00:00:00 2001 From: chenghuaWang <2923277184@qq.com> Date: Mon, 5 Jan 2026 08:55:55 +0000 Subject: [PATCH 11/13] fix: mismatched outputs and inputs --- mllm/backends/qnn/aot/passes/PTQPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllm/backends/qnn/aot/passes/PTQPass.cpp b/mllm/backends/qnn/aot/passes/PTQPass.cpp index 0539b23a2..8bd523641 100644 --- a/mllm/backends/qnn/aot/passes/PTQPass.cpp +++ b/mllm/backends/qnn/aot/passes/PTQPass.cpp @@ -181,7 +181,7 @@ void recursiveSolveNormal(const std::shared_ptr& ir_ctx, const ir auto outputs = op->outputs(); for (auto iii : inputs) { __recursiveSolveNormalImpl(iii->cast_()); } - for (auto ooo : inputs) { __recursiveSolveNormalImpl(ooo->cast_()); } + for (auto ooo : outputs) { __recursiveSolveNormalImpl(ooo->cast_()); } } if (op->isa_()) { From 54d9927cb8d31cbb0016b0064ae5583925fb0f16 Mon Sep 17 00:00:00 2001 From: chenghuaWang <2923277184@qq.com> Date: Mon, 5 Jan 2026 10:13:57 +0000 Subject: [PATCH 12/13] fix: typos --- .../qnn/aot/passes/LLMQuantRecipePass.cpp | 6 +- mllm/backends/qnn/aot/passes/PTQPass.cpp | 6 +- mllm/core/OpTypes.hpp | 1 + .../qualcomm/transformers/core/rms_norm.py | 2 +- .../transformers/core/test_qlinear.py | 89 ------------------- 5 files changed, 8 insertions(+), 96 deletions(-) delete mode 100644 pymllm/backends/qualcomm/transformers/core/test_qlinear.py diff --git a/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.cpp b/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.cpp index f60ecc14d..3b7291931 100644 --- a/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.cpp +++ b/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.cpp @@ -389,7 +389,7 @@ bool LLMQuantRecipeRMSNormPattern::rewrite(ir::IRWriter& writer, const ir::op_pt // FIXME: This dtype is hardcoded. We should make it right. auto weight_spec_attr = writer.create( - ir::linalg::QuantizationSpecAsymPerTensor::create(0, 65536, kUInt16, kFloat32, kInt32, Tensor::nil(), Tensor::nil())); + ir::linalg::QuantizationSpecAsymPerTensor::create(0, 65536 - 1, kUInt16, kFloat32, kInt32, Tensor::nil(), Tensor::nil())); weight_reg_tensor_ir->outputs().front()->setAttr("quant_recipe", weight_spec_attr); // Get self anno @@ -767,7 +767,7 @@ bool LLMQuantRecipeLinearPattern::rewrite(ir::IRWriter& writer, const ir::op_ptr ir::linalg::QuantizationSpecLPBQ::create(-8, 7, block_size, -1, 4, kUInt4, kFloat32, Tensor::nil(), Tensor::nil()); // output sym int16 - auto out_quant_spec = ir::linalg::QuantizationSpecAsymPerTensor::create(0, 65536, kUInt16, kFloat32, kInt32, + auto out_quant_spec = ir::linalg::QuantizationSpecAsymPerTensor::create(0, 65536 - 1, kUInt16, kFloat32, kInt32, Tensor::nil(), Tensor::nil()); linear_ir->outputs().front()->setAttr("quant_recipe", writer.create(out_quant_spec)); @@ -867,7 +867,7 @@ bool LLMQuantRecipeEmbeddingPattern::rewrite(ir::IRWriter& writer, const ir::op_ o_0->setAttr("quant_recipe", o_0_spec); annotation_attr->annotation_.outputs.emplace_back(o_0_spec->spec_); } else { - annotation_attr->annotation_.inputs.emplace_back( + annotation_attr->annotation_.outputs.emplace_back( o_0->getAttr("quant_recipe")->cast_()->spec_); } diff --git a/mllm/backends/qnn/aot/passes/PTQPass.cpp b/mllm/backends/qnn/aot/passes/PTQPass.cpp index 8bd523641..ea95c28c7 100644 --- a/mllm/backends/qnn/aot/passes/PTQPass.cpp +++ b/mllm/backends/qnn/aot/passes/PTQPass.cpp @@ -132,7 +132,7 @@ void recursiveSolveWeights(const std::shared_ptr& ir_ctx, const i }); } -void __recursiveSolveNormalImpl(const ir::Val::ptr_t& v) { +void _recursiveSolveNormalImpl(const ir::Val::ptr_t& v) { MLLM_RT_ASSERT(v->isa_()); auto tv = v->cast_(); MLLM_RT_ASSERT(tv->getAttr("quant_recipe")); @@ -180,8 +180,8 @@ void recursiveSolveNormal(const std::shared_ptr& ir_ctx, const ir auto inputs = op->inputs(); auto outputs = op->outputs(); - for (auto iii : inputs) { __recursiveSolveNormalImpl(iii->cast_()); } - for (auto ooo : outputs) { __recursiveSolveNormalImpl(ooo->cast_()); } + for (auto iii : inputs) { _recursiveSolveNormalImpl(iii->cast_()); } + for (auto ooo : outputs) { _recursiveSolveNormalImpl(ooo->cast_()); } } if (op->isa_()) { diff --git a/mllm/core/OpTypes.hpp b/mllm/core/OpTypes.hpp index 849df8941..310b39cd0 100644 --- a/mllm/core/OpTypes.hpp +++ b/mllm/core/OpTypes.hpp @@ -180,6 +180,7 @@ inline std::string optype2Str(OpTypes type) { case OpTypes::kRadixAttnRelax: return "RadixAttnRelax"; case OpTypes::kEqual: return "Equal"; case OpTypes::kWhere: return "Where"; + case OpTypes::kSigmoid: return "Sigmoid"; case OpTypes::kDynamicOp_Start: return "DynamicOp_Start"; case OpTypes::kOpType_End: return "OpType_End"; default: return "Unknown"; diff --git a/pymllm/backends/qualcomm/transformers/core/rms_norm.py b/pymllm/backends/qualcomm/transformers/core/rms_norm.py index ec6345d64..eb3d34b70 100644 --- a/pymllm/backends/qualcomm/transformers/core/rms_norm.py +++ b/pymllm/backends/qualcomm/transformers/core/rms_norm.py @@ -120,4 +120,4 @@ def disable_quant(self): self.weight_fake_quant.disable_fakequant() def extra_repr(self): - return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}" + return f"{tuple(self.weight.shape)}, eps={self.eps}" diff --git a/pymllm/backends/qualcomm/transformers/core/test_qlinear.py b/pymllm/backends/qualcomm/transformers/core/test_qlinear.py deleted file mode 100644 index 69edd69f6..000000000 --- a/pymllm/backends/qualcomm/transformers/core/test_qlinear.py +++ /dev/null @@ -1,89 +0,0 @@ -import torch -import torch.nn as nn -from pymllm.backends.qualcomm.transformers.core.qlinear import QLinearLPBQ - - -def test_qlinear_lpbq(): - """ - Test QLinearLPBQ implementation against bf16 baseline. - - This test verifies that the double quantization implementation - produces results close to the bf16 baseline when using appropriate - quantization parameters. - """ - # Set random seed for reproducibility - torch.manual_seed(42) - - # Test parameters - in_features = 256 - out_features = 128 - batch_size = 4 - seq_len = 16 - block_size = 64 - - # Create input tensor (bf16 baseline) - x_bf16 = torch.randn(batch_size, seq_len, in_features, dtype=torch.bfloat16) - - # Create reference linear layer (bf16) - linear_bf16 = nn.Linear(in_features, out_features, bias=True, dtype=torch.bfloat16) - # Copy weights and bias to ensure same values - with torch.no_grad(): - linear_bf16.weight.copy_( - torch.randn(out_features, in_features, dtype=torch.bfloat16) - ) - linear_bf16.bias.copy_(torch.zeros(out_features, dtype=torch.bfloat16)) - - # Get bf16 reference output - with torch.no_grad(): - output_bf16 = linear_bf16(x_bf16) - - # Create QLinearLPBQ with same weights - qlinear = QLinearLPBQ( - in_features=in_features, - out_features=out_features, - bias=True, - block_size=block_size, - already_quantized_weight=False, - already_quantized_activation=False, - ) - - # Copy the same weights and bias - with torch.no_grad(): - qlinear.weight.copy_(linear_bf16.weight.data) - if qlinear.bias is not None: - qlinear.bias.copy_(linear_bf16.bias.data) - - # Get quantized output - with torch.no_grad(): - output_q = qlinear(x_bf16) - output_q_bf16 = output_q - - # Calculate metrics - mse = torch.mean((output_bf16 - output_q_bf16) ** 2) - mae = torch.mean(torch.abs(output_bf16 - output_q_bf16)) - - # Calculate relative error - relative_error = torch.mean( - torch.abs(output_bf16 - output_q_bf16) / (torch.abs(output_bf16) + 1e-8) - ) - - # Print results - print("=== QLinearLPBQ Test Results ===") - print(f"Input shape: {x_bf16.shape}") - print(f"Output shape: {output_bf16.shape}") - print(f"Block size: {block_size}") - print("\nComparison with bf16 baseline:") - print(f"MSE: {mse:.6e}") - print(f"MAE: {mae:.6e}") - print(f"Relative Error: {relative_error:.6e}") - - # Check if results are within acceptable tolerance - # For double quantization, we expect some error but should be reasonable - tolerance = 0.1 # 10% relative error tolerance - - if relative_error < tolerance: - print(f"\n✓ TEST PASSED: Relative error {relative_error:.6e} < {tolerance}") - return True - else: - print(f"\n✗ TEST FAILED: Relative error {relative_error:.6e} >= {tolerance}") - return False From fb4075936b89f64b17af1c5aca6d9478694cad4f Mon Sep 17 00:00:00 2001 From: chenghuaWang <2923277184@qq.com> Date: Mon, 5 Jan 2026 11:10:26 +0000 Subject: [PATCH 13/13] fix: typos --- mllm/backends/qnn/aot/passes/LLMQuantRecipePass.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.cpp b/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.cpp index 3b7291931..37fdaffec 100644 --- a/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.cpp +++ b/mllm/backends/qnn/aot/passes/LLMQuantRecipePass.cpp @@ -857,6 +857,8 @@ bool LLMQuantRecipeEmbeddingPattern::rewrite(ir::IRWriter& writer, const ir::op_ if (!i_0->getAttr("quant_recipe")) { auto i_0_spec = genSimpleQuantizationSpecAttr(writer.getContext(), i_0->cast_()); i_0->setAttr("quant_recipe", i_0_spec); + annotation_attr->annotation_.inputs.emplace_back( + i_0->getAttr("quant_recipe")->cast_()->spec_); } else { annotation_attr->annotation_.inputs.emplace_back( i_0->getAttr("quant_recipe")->cast_()->spec_);